diff --git a/package-lock.json b/package-lock.json
index 7b793c1..532ae49 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,6 +1,6 @@
{
"name": "docnow-catalog",
- "version": "0.0.1",
+ "version": "0.0.2",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
diff --git a/package.json b/package.json
index 65a409c..485ffd5 100644
--- a/package.json
+++ b/package.json
@@ -4,6 +4,9 @@
"description": "A clearinghouse for tweet datasets",
"version": "0.0.2",
"author": "Documenting the Now ",
+ "engines": {
+ "node": "12"
+ },
"dependencies": {
"@material-ui/core": "^4.11.2",
"@material-ui/styles": "^4.11.2",
diff --git a/src/components/datasets.js b/src/components/datasets.js
index ae1f94e..b30aace 100644
--- a/src/components/datasets.js
+++ b/src/components/datasets.js
@@ -257,13 +257,13 @@ function filterSearch(datasets, search) {
const pattern = new RegExp(search, 'i')
const slugs = []
for (const d of datasets) {
- if (d.title.match(pattern)) {
+ if (d.title && d.title.match(pattern)) {
slugs.push(d.slug)
- } else if (d.description.match(pattern)) {
+ } else if (d.description && d.description.match(pattern)) {
slugs.push(d.slug)
} else if (d.creators.map(c => c.name).join(' ').match(pattern)) {
slugs.push(d.slug)
- } else if (d.repository.match(pattern)) {
+ } else if (d.repository && d.repository.match(pattern)) {
slugs.push(d.slug)
} else if (d.subjects.join(' ').match(pattern)) {
slugs.push(d.slug)
diff --git a/src/datasets/aspw-twitter-dataset-2021-11-30.md b/src/datasets/aspw-twitter-dataset-2021-11-30.md
index c617150..d656f57 100644
--- a/src/datasets/aspw-twitter-dataset-2021-11-30.md
+++ b/src/datasets/aspw-twitter-dataset-2021-11-30.md
@@ -7,7 +7,7 @@ dates:
- end: '2021-11-27'
start: '2020-11-12'
published: 2021-11-30
-repository:
+repository: GitHub
subjects:
- coronavirus
- pandemia
@@ -18,7 +18,7 @@ subjects:
- church
- border crisis
- vaccinations
-title: the Social Archive of the Polish Web
+title: The Social Archive of the Polish Web
tweets: 4617353
url: https://github.com/mw0000/aspw-twitter-dataset-2021-11-30
---
diff --git a/static/data/datasets.json b/static/data/datasets.json
index dfcbe72..a2bd095 100644
--- a/static/data/datasets.json
+++ b/static/data/datasets.json
@@ -1,4 +1,37 @@
[
+ {
+ "title": "The Social Archive of the Polish Web",
+ "creators": [
+ {
+ "name": "Marcin Wilkowski",
+ "email": "aspw[at]wilkowski.org"
+ }
+ ],
+ "added": "2021-11-30T23:42:14.000Z",
+ "published": "2021-11-30T00:00:00.000Z",
+ "dates": [
+ {
+ "start": "2020-11-12",
+ "end": "2021-11-27"
+ }
+ ],
+ "repository": "GitHub",
+ "subjects": [
+ "coronavirus",
+ "pandemia",
+ "politics",
+ "media",
+ "cities",
+ "LGBT",
+ "church",
+ "border crisis",
+ "vaccinations"
+ ],
+ "tweets": 4617353,
+ "url": "https://github.com/mw0000/aspw-twitter-dataset-2021-11-30",
+ "slug": "aspw-twitter-dataset-2021-11-30",
+ "description": "
4617353 tweets IDs (4398351 unique) in Polish language covering topics like: coronavirus pandemia, politics, media, cities, LGBT, church, border crisis, vaccinations. For details, see meta.csv in every directory. All this data together with the URLs of web pages linked within that tweets can be accessed in https://github.com/mw0000/aspw-public-archive or https://aspw.pl/pakiety.
"
+ },
{
"title": "#retweetthe8th: 2018 Referendum to repeal the 8th Amendment of the Constitution of Ireland",
"creators": [
@@ -2811,7 +2844,7 @@
],
"tweets": 5655632,
"url": "http://dx.doi.org/10.7910/DVN/TQBLWZ",
- "slug": "20170907-end-of-term-2016-us-government-twitter-archive",
+ "slug": "20170907-end-of-term-2016-u-s-government-twitter-archive",
"description": "
This dataset contains the tweet ids of 5,655,632 tweets that were collected from approximately 3000 Twitter accounts affiliated with the U.S. government. They were collected between October 21, 2016 and January 21, 2017 from the Twitter API using Social Feed Manager. This dataset was created as part of the End of Term Web Archiving initiative. The lists of accounts came from the U.S. Digital Registry and by public submissions.
This dataset contains the tweet ids of 5,655,632 tweets that were collected from approximately 3000 Twitter accounts affiliated with the U.S. government. They were collected between October 21, 2016 and January 21, 2017 from the Twitter API using Social Feed Manager. This dataset was created as part of the End of Term Web Archiving initiative. The lists of accounts came from the U.S. Digital Registry and by public submissions.
This dataset contains the tweet ids of 7,275,228 tweets related to the Women's March on January 21, 2017. They were collected between December 19, 2016 and January 23, 2017 from the Twitter API using Social Feed Manager. See included README.txt for additional information.
This dataset contains the tweet ids of 7,275,228 tweets related to the Women's March on January 21, 2017. They were collected between December 19, 2016 and January 23, 2017 from the Twitter API using Social Feed Manager. See included README.txt for additional information.
8,595,589 tweet ids for aleppo tweets captured during the fall of Aleppo in December 2016. Tweets can be \"rehydrated\" with Documenting the Now's twarc (https://github.com/DocNow/twarc). twarc.py --hydrate aleppotweetids.txt > aleppo.json
This item represents a collection of 13,480,000 tweet IDs that mentioned 'ferguson' from 2014-08-10 to 2014-08-27 and 15,080,078 tweet IDs that mention \"ferguson\" between 2014-11-11 and 2014-12-08.\nThe first set includes tweets for the two week period after the shooting of Michael Brown, and the second range includes tweets around the grand jury's decision not to indict police office Darren Wilson which was announced on 2014-11-24.\nThe first set of tweets were collected by Ed Summers at the University of Maryland and the second was a collaboration between Molly Loyd, Gregory Coleman, Kimberly Lamke, Benjamin Sugar and Ed Summers.
This data set identifies 38M tweets collected for the analysis of social media messages related to the 2012 U.S. Presidential election. The data set provides tweet IDs for tweets containing the words \"obama\", \"romney\", or both (case-insensitive matching) during the period from July 1, 2012 through November 7, 2012. The paper, “Online and Social Media Data As an Imperfect Continuous Panel Survey.” PLoS ONE 11(1): e0145406 by Diaz et al. provides further description of the dataset.
This dataset contains the tweet ids of approximately 280 million tweets related to the 2016 United States presidential election. They were collected between July 13, 2016 and November 10, 2016 from the Twitter API using Social Feed Manager. These tweet ids are broken up into 12 collections. Each collection was collected either from the GET statuses/user_timeline method of the Twitter REST API or the POST statuses/filter method of the Twitter Stream API.
This data set identifies 38M tweets collected for the analysis of social media messages related to the 2012 U.S. Presidential election. The data set provides tweet IDs for tweets containing the words \"obama\", \"romney\", or both (case-insensitive matching) during the period from July 1, 2012 through November 7, 2012. The paper, “Online and Social Media Data As an Imperfect Continuous Panel Survey.” PLoS ONE 11(1): e0145406 by Diaz et al. provides further description of the dataset.
"
+ },
{
"title": "#JeSuisCharlie, #JeSuisAhmed, #JeSuisJuif, #CharlieHebdo tweets",
"creators": [
diff --git a/static/feed.xml b/static/feed.xml
index 0a1d1f2..f5936c4 100644
--- a/static/feed.xml
+++ b/static/feed.xml
@@ -1,4 +1,4 @@
-https://catalog.docnow.iohttps://catalog.docnow.io/images/docnow.pngDocumenting the Now Tweet Cataloghttps://catalog.docnow.ioRSS for NodeWed, 24 Nov 2021 17:26:57 GMTThis dataset contains the tweet ids of 2,108,782 tweets related to the referendum to repeal the 8th Amendment of the Constitution of Ireland and replace it with the Thirty-sixth Amendment of the Constitution of Ireland on May 25, 2018.
+https://catalog.docnow.iohttps://catalog.docnow.io/images/docnow.pngDocumenting the Now Tweet Cataloghttps://catalog.docnow.ioRSS for NodeTue, 05 Apr 2022 10:18:48 GMT4617353 tweets IDs (4398351 unique) in Polish language covering topics like: coronavirus pandemia, politics, media, cities, LGBT, church, border crisis, vaccinations. For details, see meta.csv in every directory. All this data together with the URLs of web pages linked within that tweets can be accessed in https://github.com/mw0000/aspw-public-archive or https://aspw.pl/pakiety.
This dataset is published at https://github.com/mw0000/aspw-twitter-dataset-2021-11-30
]]>https://catalog.docnow.io/datasets/aspw-twitter-dataset-2021-11-30/https://catalog.docnow.io/datasets/aspw-twitter-dataset-2021-11-30/This dataset contains the tweet ids of 2,108,782 tweets related to the referendum to repeal the 8th Amendment of the Constitution of Ireland and replace it with the Thirty-sixth Amendment of the Constitution of Ireland on May 25, 2018.
They were collected between March 9th, 2018 and May 30th, 2018 from the Twitter filter stream API using twarc. The set of terms that were used for the search were: #repealthe8th, #together4yes, #8thref, #savethe8th, #hometovote, #togetherforyes, #repealedthe8th, #loveboth, #voteyes, #voteno, #lovebothvoteno, #repealtheeighth.
Note that the terms changed during the course of data collection and that the searches were not exhaustive.
The Twitter API statuses/lookup method supports retrieving the complete tweet
@@ -148,8 +148,8 @@ Tweet with certain terms or hashtags: glasgow, BetterTogether, GlasgowCC, Yougov
https://twitter.com/Chatvert/status/918817455505575942.
This dataset is published at https://dx.doi.org/10.6086/D1Q66X
]]>https://catalog.docnow.io/datasets/20171014-amplifywomen/https://catalog.docnow.io/datasets/20171014-amplifywomen/This dataset includes 80,339 tweet ids collected on October 14th, 2017 that use the hashtag #WOCAffirmation. The hashtag was started by April Reign (@ReignOfApril) as a way to amplify voices of women of color and partly as a response to a Twitter boycott started in support of actress Rose McGowan, after she revealed that she was sexually assaulted by HarveyWeinstein. These tweets by April Reign show her calling for Twitter users to use the hashtag:
to https://twitter.com/ReignOfApril/status/918691938143834112,
https://twitter.com/ReignOfApril/status/918695092587601920,
-https://twitter.com/ReignOfApril/status/918696352359391232.
This dataset is published at https://dx.doi.org/10.6086/D1TW8H
]]>https://catalog.docnow.io/datasets/20171014-wocaffirmation/https://catalog.docnow.io/datasets/20171014-wocaffirmation/This dataset contains 18, 646 tweet ids documenting the March for Black Women which was held on September 30th, 2017 in Washington D.C. The dataset contains 2,925 tweet ids for tweets that included the hashtag #marchforblackwomen and 15,271 tweet ids for tweets that included the #hashtag M4BW. The march website is here: https://www.mamablack.org/march-for-black-women.
This dataset is published at https://dx.doi.org/10.6086/D14D4Z
]]>https://catalog.docnow.io/datasets/20171003-marchforblackwomen/https://catalog.docnow.io/datasets/20171003-marchforblackwomen/2017 Catalonia attacks were a terrorism action against pedestrians who were at La Rambla (Barcelona) and beach promenade of Cambrils on the afternoon and night of 17-18th August 2017. We selected #NoTincPor hashtag because it was the motto of the demonstrations during that period and was the most positive message. No one knew how the situation would go so the best option was to collect the dataset using the search API and limit by time between August 17 - 26, which was the final demonstration in Barcelona for the victims.
This dataset is published at http://www.docnow.io/catalog/datasets/F48D674F-C859-4E4F-AC83-510181DA3545.csv.gz
]]>https://catalog.docnow.io/datasets/20170926-notincpor/https://catalog.docnow.io/datasets/20170926-notincpor/This dataset contains Twitter JSON data for Tweets related to Hurricane Harvey and the subsequent flooding along the Texas gulf region. This dataset was created using the twarc (https://github.com/edsu/twarc) package that makes use of Twitter's search API. A total of 7,041,866 Tweets make up the combined dataset. See included README.txt for additional information.
This dataset is published at https://digital.library.unt.edu/ark:/67531/metadc993940/
]]>https://catalog.docnow.io/datasets/20170924-hurricane-harvey-twitter-dataset/https://catalog.docnow.io/datasets/20170924-hurricane-harvey-twitter-dataset/The hashtag #DrawingWhileBlack was started by artist, Annabelle, on September 15th, 2017 to celebrate the work of Black artists. The dataset includes 69,236 tweet ids collected 09/17/2017. Annabelle's Tumblr website can be found at http://sparklyfawn.tumblr.com/ and her Twitter profile is @sparklyfawn.
This dataset is published at https://dx.doi.org/10.6086/D1DT02
]]>https://catalog.docnow.io/datasets/20170917-drawingwhileblack/https://catalog.docnow.io/datasets/20170917-drawingwhileblack/This dataset contains the tweet ids of 39,695,156 tweets collected from the Twitter accounts of aproximately 4,500 news outlets, i.e., accounts of media organizations intended to disseminate news. The media organizations include everything from local U.S. newspapers to foreign television stations. They were collected between August 4, 2016 and July 20, 2018 from the Twitter API using Social Feed Manager. Note that not all accounts may have been collected for the entire duration and there may be tweets from before the time period. We intend to update this dataset periodically.
This dataset is published at http://dx.doi.org/10.7910/DVN/2FIFLH
]]>https://catalog.docnow.io/datasets/20170908-news-outlet-tweet-ids/https://catalog.docnow.io/datasets/20170908-news-outlet-tweet-ids/This dataset contains the tweet ids of 5,655,632 tweets that were collected from approximately 3000 Twitter accounts affiliated with the U.S. government. They were collected between October 21, 2016 and January 21, 2017 from the Twitter API using Social Feed Manager. This dataset was created as part of the End of Term Web Archiving initiative. The lists of accounts came from the U.S. Digital Registry and by public submissions.
This dataset is published at http://dx.doi.org/10.7910/DVN/TQBLWZ
]]>https://catalog.docnow.io/datasets/20170907-end-of-term-2016-us-government-twitter-archive/https://catalog.docnow.io/datasets/20170907-end-of-term-2016-us-government-twitter-archive/This dataset contains the tweet ids of 5,655,632 tweets that were collected from approximately 3000 Twitter accounts affiliated with the U.S. government. They were collected between October 21, 2016 and January 21, 2017 from the Twitter API using Social Feed Manager. This dataset was created as part of the End of Term Web Archiving initiative. The lists of accounts came from the U.S. Digital Registry and by public submissions.
This dataset is published at http://dx.doi.org/10.7910/DVN/TQBLWZ
]]>https://catalog.docnow.io/datasets/20170907-end-of-term-2016-u-s-government-twitter-archive/https://catalog.docnow.io/datasets/20170907-end-of-term-2016-u-s-government-twitter-archive/The 2017 solar eclipse occurred on August 21 and and was total for Oregon, Idaho, Wyoming, Nebraska, Kansas, Missouri, Illinois, Kentucky, Tennessee, North Carolina, Georgia, and South Carolina. This dataset includes 13,548,321 tweet identifiers for tweets that included any of the keywords solareclipse2017, solareclipse, eclipse2017, eclipseday or eclipse for the period August 17 to August 23, 2017. The hashtags were were selected after watching Twitter's streaming API for the trending hashtag #solareclipse2017 and counting the most popular co-occurring hashtags. The search API was used instead of the filter stream API because the stream was emitting notifications that many tweets were not delivered, since the volume was so high.
This dataset is published at https://archive.org/details/eclipse-tweets.csv
]]>https://catalog.docnow.io/datasets/20170901-2017-eclipse/https://catalog.docnow.io/datasets/20170901-2017-eclipse/This dataset contains Twitter JSON data for several Twitter search queries that were collected the week following the shooting of police officers in Dallas, Texas on July 7th 2017, using the twarc (https://github.com/edsu/twarc) package that makes use of Twitter's search API. See included README.txt for additional information.
This dataset is published at https://digital.library.unt.edu/ark:/67531/metadc991469/
]]>https://catalog.docnow.io/datasets/20170831-dallas-police-shooting-twitter-dataset/https://catalog.docnow.io/datasets/20170831-dallas-police-shooting-twitter-dataset/The #WITBragDay hashtag was used starting August 12, 2017 by women sharing their accomplishments in technology. Tweets matching the query WITBragDay were collected using using the POST statuses/filter method of the Twitter Stream API and the GET statuses/search Twitter REST API using Social Feed Manager. There are 34,266 ids for tweets retrieved from the filter stream and 47,621 ids for tweets retrieved using the search API. The dataset includes a list of 52,457 unique tweet ids from both APIs.
This dataset is published at http://dx.doi.org/10.7910/DVN/IRNS5Z
]]>https://catalog.docnow.io/datasets/20170824-witbragday/https://catalog.docnow.io/datasets/20170824-witbragday/This dataset contains the tweet ids of 35,596,281 tweets related to Hurricanes Irma and Harvey. They were collected during these events from the Twitter API using Social Feed Manager. These tweet ids are broken up into 2 collections. Each collection was collected using the POST statuses/filter method of the Twitter Stream API.
This dataset is published at http://dx.doi.org/10.7910/DVN/QRKIBW
]]>https://catalog.docnow.io/datasets/20171113-hurricanes-harvey-and-irma-tweet-ids/https://catalog.docnow.io/datasets/20171113-hurricanes-harvey-and-irma-tweet-ids/The Unite the Right rally (also known as the Charlottesville rally) was a protest in Charlottesville, Virginia, United States from August 11–12, 2017, to oppose the removal of a statue of Robert E. Lee in Emancipation Park, which itself was renamed from Lee Park two months earlier. Protesters included white supremacists, white nationalists, neo-Confederates, neo-Nazis, and militias. This dataset contains 200,113 tweet ids collected with the #unitetheright hashtag. Data collection was performed twice from the search API using twarc: once at 2017-08-13 11:46:05 GMT and the other at 2017-08-15 12:03:48 GMT. The second search was run to collect only up to where the first search left off. The time ranges for the tweets are from 2017-08-04 11:44:12 to 2017-08-15 16:03:30 GMT.
This dataset is published at https://archive.org/details/unitetheright-ids
]]>https://catalog.docnow.io/datasets/20170818-unitetheright/https://catalog.docnow.io/datasets/20170818-unitetheright/On Friday, August 11th, 2017 a large groups of racist white nationalists carrying torches marched on the University of Virginia campus in Charlottesville, VA as an intimidation tactic against proponents for the removal of confederate statues of Robert E. Lee. The Friday evening march was held ahead of a much larger racist white nationalist rally in the center of Charlottesville planned for Saturday, August 12th, 2017. This dataset includes 100,000 tweet ids collected using the DocNow prototype http://app.docnow.io/ and includes tweets sent from 01:13:56 - 7:11:36 EDT on August 12.
This dataset is published at https://dx.doi.org/10.6086/D10T0B
]]>https://catalog.docnow.io/datasets/20170812-charlottesville/https://catalog.docnow.io/datasets/20170812-charlottesville/This dataset contains 32,056 tweets that mention "ferguson" between August 8 and August 10, 2014. They were collected on May 7th, 2015 from the search form on Twitter's website. Some important side effects to be aware of is that the dataset does not include retweets and tweets that were deleted before May 7th, 2015.
This dataset is published at https://archive.org/details/ferguson-201408
]]>https://catalog.docnow.io/datasets/20170809-ferguson-august-8-10-2014/https://catalog.docnow.io/datasets/20170809-ferguson-august-8-10-2014/39,264 IDs for tweets related to the Charlottesville KKK rally on July 8, 2017. These tweet IDs matched a search for 'Charlottesville KKK OR #charlottesvilleKKK OR #blocKKK or #blocKKKparty'. These tweet IDs were collected with the twarc command line tool from Documenting the Now. Using twarc's hydrate command, researchers can retrieve the full content of those tweets—with additional metadata provided by Twitter's API—provided the tweets still exist.
This dataset is published at https://dataverse.lib.virginia.edu/dataset.xhtml?persistentId=doi:10.18130/V3/MSCNLT
]]>https://catalog.docnow.io/datasets/20170711-charlottesville-kkk-tweets/https://catalog.docnow.io/datasets/20170711-charlottesville-kkk-tweets/Identifiers for 25,489 tweets about the students' strike at the University of Puerto Rico. The tweets included the hashtag #HuelgaUPR or #Huelga2017 and are from April 11 to May 18, 2017. The tweets were collected using twarc. For a list of resources about the strike visit Puerto Rico Syllabus.
-Identificadores de 25,439 tuits sobre la huelga estudiantil en la Universidad de Puerto Rico. Los tuits fueron capturados utilizando twarc y cubren el periodo del 11 de abril al 18 de mayo. Para más información sobre la huelga visite Puerto Rico Syllabus.
This dataset is published at https://archive.org/details/tweet-ids_HuelgaUPR20170420-0518
]]>https://catalog.docnow.io/datasets/20170519-huelgaupr-tweets-april-20-may-18-2017/https://catalog.docnow.io/datasets/20170519-huelgaupr-tweets-april-20-may-18-2017/Identifiers for 782,509 tweets that included the hashtag #macronleaks or #macrongate that were sent between 2017-05-10 16:14:51 and 2017-05-02 07:02:05 UTC. The tweets were collected from the Twitter Search API using twarc. The data does not include the first use of the #macrongate hashtag, but it does include the first use of the #macronleaks hashtag which went viral after Wikileaks retweeted it. More about the story of the #marconleaks hashtag can be found at: http://www.newyorker.com/news/news-desk/the-far-right-american-nationalist-who-tweeted-macronleaks
This dataset is published at https://archive.org/details/MacronleaksTweets
]]>https://catalog.docnow.io/datasets/20170517-macronleaks-tweets/https://catalog.docnow.io/datasets/20170517-macronleaks-tweets/On 20 April 2017 the Australian Government announced that the Australian citizenship test would be made harder, with an increased focus on 'Australian values'. Suggestions as to what 'Australian values' might actually be soon started to be shared on Twitter using the hashtag #australianvalues. 55,698 tweet ids for #australianvales collected with #Documenting the Now's Twarc from 20 to 27 April 2017.
This dataset is published at https://doi.org/10.6084/m9.figshare.4982747
]]>https://catalog.docnow.io/datasets/20170508-australianvalues-tweets-20-27-april-2017/https://catalog.docnow.io/datasets/20170508-australianvalues-tweets-20-27-april-2017/681,668 tweet ids for #climatemarch collected with Documenting the Now's twarc from January 22-26, 2017. Tweets can be "rehydrated" with Documenting the Now's twarc (https://github.com/DocNow/twarc). twarc.py hydrate climatemarchtweetids.txt > climatemarch.json.
This dataset is published at http://dx.doi.org/10.5683/SP/KZZVZW
]]>https://catalog.docnow.io/datasets/20170503-climatemarch-tweets-april-19-may-3-2017/https://catalog.docnow.io/datasets/20170503-climatemarch-tweets-april-19-may-3-2017/This bag contains 10,159,892 tweets and retweets sent by or to Twitter user jkrowling between 2015-07-08 and 2017-03-18. The tweets were collected with Social Feed Manager (m5003).
This dataset is published at https://archive.org/details/Jk_rowlingTweets
]]>https://catalog.docnow.io/datasets/20170427-j-k-rowling-tweets-retweets/https://catalog.docnow.io/datasets/20170427-j-k-rowling-tweets-retweets/This bag contains 10,159,892 tweets and retweets sent by or to Twitter user jkrowling between 2015-07-08 and 2017-03-18. The tweets were collected with Social Feed Manager (m5003).
This dataset is published at https://archive.org/details/Jk_rowlingTweets
]]>https://catalog.docnow.io/datasets/20170427-j-k-rowling-tweetsretweets/https://catalog.docnow.io/datasets/20170427-j-k-rowling-tweetsretweets/1,276,220 tweet ids for #MarchForScience collected with Documenting the Now's twarc from January 22-26, 2017. Tweets can be "rehydrated" with Documenting the Now's twarc (https://github.com/DocNow/twarc). twarc.py hydrate MarchForScience_tweet-ids.txt > MarchForScience.json.
This dataset is published at http://dx.doi.org/10.5683/SP/7BC9V1
]]>https://catalog.docnow.io/datasets/20170426-marchforscience-tweets-april-12-26-2017/https://catalog.docnow.io/datasets/20170426-marchforscience-tweets-april-12-26-2017/The hashtag #BlackWomenAtWork began trending following Fox News host, Bill O'Reilly's sexist and racist comments about California Congresswoman's Maxine Water's hair on March 28th, 2017 and White House Press Secretary, Sean Spicer's remarks to journalist, April Ryan during press briefing on the same day. The hashtag began trending after Brittany Packnett used it in a set of tweets where she asked black women to share their experiences about being black women at work. These tweet ids were collected on four separate occasions using the DocNow prototype twitter collection tool. bwaw1 (10,000 tweets), bwaw2 (41,256 tweets), bwaw3 (92,756 tweets) were collected on March 28th, the day the hashtag began trending. bwaw4 (140,000 tweets) was collected on March 29th.
This dataset is published at https://dx.doi.org/10.6086/D1PP4P
]]>https://catalog.docnow.io/datasets/20170329-blackwomenatwork/https://catalog.docnow.io/datasets/20170329-blackwomenatwork/This bag contains 2,711,011 tweets identifiers collected from the Twitter filter stream between 2017-02-09 and 2017-03-18 that used one or more of the following hashtags: alternativefacts, fakenews, truthiness, postfact, posttruth, factcheck. The original tweets were collected using twarc.
This dataset is published at https://archive.org/details/fakenews-tweets
]]>https://catalog.docnow.io/datasets/20170328-fake-news-tweets/https://catalog.docnow.io/datasets/20170328-fake-news-tweets/This dataset contains the tweet ids of 7,275,228 tweets related to the Women's March on January 21, 2017. They were collected between December 19, 2016 and January 23, 2017 from the Twitter API using Social Feed Manager. See included README.txt for additional information.
This dataset is published at https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/5ZVMOR
]]>https://catalog.docnow.io/datasets/20170203-womens-march-tweet-ids/https://catalog.docnow.io/datasets/20170203-womens-march-tweet-ids/This dataset contains the tweet ids of 7,275,228 tweets related to the Women's March on January 21, 2017. They were collected between December 19, 2016 and January 23, 2017 from the Twitter API using Social Feed Manager. See included README.txt for additional information.
This dataset is published at https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/5ZVMOR
]]>https://catalog.docnow.io/datasets/20170203-women-s-march-tweet-ids/https://catalog.docnow.io/datasets/20170203-women-s-march-tweet-ids/brexit tweets collected from the 5th of May to the 24th August 2016.
This dataset is published at https://zenodo.org/record/263584
]]>https://catalog.docnow.io/datasets/20170129-brexit-summer-2016/https://catalog.docnow.io/datasets/20170129-brexit-summer-2016/14,478,518 tweet ids for #WomensMarch collected with Documenting the Now's twarc from January 21-28, 2017. Tweets can be "rehydrated" with Documenting the Now's twarc (https://github.com/DocNow/twarc). twarc.py –hydrate WomensMarchtweetids.txt > WomensMarch.json Also included are the logs files for the Filter API and Search API queries. The Filter API query captures the cumulative number of dropped tweets.
This dataset is published at http://dx.doi.org/10.5683/SP/ZEL1Q6
]]>https://catalog.docnow.io/datasets/20170129-womensmarch-tweets-january-12-28-2017/https://catalog.docnow.io/datasets/20170129-womensmarch-tweets-january-12-28-2017/These 136,990 tweet ids represent reaction to a Facebook Live video that was posted on January 3rd, 2017, showing four African American men violently attacking a white, mentally disabled man. The tweets were collected on 01/05/2017. After the video surfaced, the Twitter hashtag, #BLMkidnapping, was created and used to incorrectly attribute the violent attack to members of the Black Lives Matter movement. Police in Chicago, where the attack took place, have found no evidence the attack has any connection to the Black Lives Matter movement. This link is to a CNN story documenting the police denial of Black Lives Matter connection: http://www.cnn.com/2017/01/05/us/black-lives-matter-chicago-facebook-live-beating/index.html
This dataset is published at https://dx.doi.org/10.6086/D1CC7X
]]>https://catalog.docnow.io/datasets/20170115-blmkidnapping/https://catalog.docnow.io/datasets/20170115-blmkidnapping/On January 12th, 2017 the Senate voted 51-48 to approve a budget resolution as the first step in repealing the Affordable Care Act. The hashtag #SaveACA began being used heavily on Twitter the same day as a response. This dataset includes tweet ids collected on four separate occasions on January 12th and 13th, 2017 for the hashtag #SaveACA
This dataset is published at https://dx.doi.org/10.6086/D17P4N
]]>https://catalog.docnow.io/datasets/20170115-saveaca/https://catalog.docnow.io/datasets/20170115-saveaca/An ongoing collection of Tweets collected by NCSU Libraries using twarc for the key terms "HB2", "WeAreNotThis", and "BoycottNC", "KeepNCFair", and "ThisIsNotUs". "WeAreNotThis", "BoycottNC", "ThisIsNotUs", and "North Carolina" beginning on 2016-03-24, and "HB2" beginning on 2016-12-25. Only Tweets including "HB2", "bathroom", "bill", or "KeepNCFair" are included from the "North Carolina" set. These tags were used to discuss North Carolina House Bill 2 (The Public Facilities Privacy & Security Act), passed in March 2016, which includes provisions (among others) that disallow local municipalities from passing their own anti-discrimination ordinances and also require individuals, when using use public bathrooms, to use those that align with their sex as stated on their birth certificates rather than the restroom that is consistent with their gender identity (see: https://en.wikipedia.org/wiki/Public_Facilities_Privacy_%26_Security_Act). This dataset is broken into files of no more than 50,000 Tweet IDs each.
This dataset is published at https://github.com/NCSU-Libraries/HB2-Twitter-data
]]>https://catalog.docnow.io/datasets/20170112-north-carolina-house-bill-2-tweet-ids/https://catalog.docnow.io/datasets/20170112-north-carolina-house-bill-2-tweet-ids/A list of 10,538 Twitter IDs for tweets harvested between 4 January at 11am and 9 January at 11am using Social Feed Manager. As this used the search API, the 4 January at 11am crawl went back about 5-9 days. Tweet IDs included, as is a log of the decisions made to curate this dataset.
This dataset is published at http://dx.doi.org/10.5683/SP/CFVF1F
]]>https://catalog.docnow.io/datasets/20170109-american-historical-association-2017-conference-tweets/https://catalog.docnow.io/datasets/20170109-american-historical-association-2017-conference-tweets/A list of 24876 Twitter IDs for tweets harvested between Nov. 28 and Dec. 6 2014 containing the hashtag #bill10. Bill 10 in the Alberta legislature would have given public and Catholic school boards the right to refuse student requests to form gay-straight alliances in schools. Under intense public interest it was withdrawn by the Conservative government.
This dataset is published at http://dx.doi.org/10.7939/DVN/10971
]]>https://catalog.docnow.io/datasets/20170107-bill-10-twitter-ids/https://catalog.docnow.io/datasets/20170107-bill-10-twitter-ids/This is a dataset of ids for tweets purchased from Twitter as part of the Beyond the Hashtags study http://cmsimpact.org/resource/beyond-hashtags-ferguson-blacklivesmatter-online-struggle-offline-justice/ The dataset includes a year of tweets that mention one or more of 45 keywords associated with the BlackLivesMatter movement. This period covers a critical time in which social media was used to raise awareness about police killings of unarmed Black citizens in the United States.
This dataset is published at http://dfreelon.org/2017/01/03/beyond-the-hashtags-twitter-data/
]]>https://catalog.docnow.io/datasets/20170104-beyond-the-hashtags-ferguson-blacklivesmatter-and-the-online-struggle-for-offline-justice/https://catalog.docnow.io/datasets/20170104-beyond-the-hashtags-ferguson-blacklivesmatter-and-the-online-struggle-for-offline-justice/228,086 tweet ids for "TheHip, hipinkingston" captured during the Tragically Hip's final concert in Kingston, Ontario in August 2016. Tweets can be "rehydrated" with Documenting the Now's twarc (https://github.com/DocNow/twarc). twarc.py --hydrate thfinalconcertkingstontweetids.txt > thfinalconcertkingston.json
This dataset is published at http://dx.doi.org/10.5683/SP/NGMQNX
]]>https://catalog.docnow.io/datasets/20161231-tweet-ids-for-final-tragically-hip-concert/https://catalog.docnow.io/datasets/20161231-tweet-ids-for-final-tragically-hip-concert/These are tweets that were collected between August 27, 2015 and January 4, 2016 that mention the word "trump". This period marked important early months in the Republican primaries. They were collected from Twitter's streaming API using twarc.
-There are 40,202,199 tweet identifiers in all. Due to network outages there are gaps at the following points: 2015-08-27 19:12:37 - 2015-08-27 20:13:44 ; 2015-11-02 02:02:13 - 2015-11-05 16:20:35 ; 2015-12-28 02:02:42 - 2015-12-28 02:04:00
This dataset is published at https://archive.org/details/trump-tweet-ids
]]>https://catalog.docnow.io/datasets/20161230-trump-early-primary-tweets/https://catalog.docnow.io/datasets/20161230-trump-early-primary-tweets/8,595,589 tweet ids for aleppo tweets captured during the fall of Aleppo in December 2016. Tweets can be "rehydrated" with Documenting the Now's twarc (https://github.com/DocNow/twarc). twarc.py --hydrate aleppotweetids.txt > aleppo.json
This dataset is published at http://dx.doi.org/10.5683/SP/DJLHSB
]]>https://catalog.docnow.io/datasets/20161230-the-fall-of-aleppo-tweets-aleppo-2016-12-13-through-2016-12-29/https://catalog.docnow.io/datasets/20161230-the-fall-of-aleppo-tweets-aleppo-2016-12-13-through-2016-12-29/Tweet ids for #elxn42 tweets.
This dataset is published at http://hdl.handle.net/10864/11270
]]>https://catalog.docnow.io/datasets/20161224-elxn42-tweets-42nd-canadian-federal-election/https://catalog.docnow.io/datasets/20161224-elxn42-tweets-42nd-canadian-federal-election/This item represents a collection of 13,480,000 tweet IDs that mentioned 'ferguson' from 2014-08-10 to 2014-08-27 and 15,080,078 tweet IDs that mention "ferguson" between 2014-11-11 and 2014-12-08.
+https://twitter.com/ReignOfApril/status/918696352359391232.
This dataset is published at https://dx.doi.org/10.6086/D1TW8H
]]>https://catalog.docnow.io/datasets/20171014-wocaffirmation/https://catalog.docnow.io/datasets/20171014-wocaffirmation/This dataset contains 18, 646 tweet ids documenting the March for Black Women which was held on September 30th, 2017 in Washington D.C. The dataset contains 2,925 tweet ids for tweets that included the hashtag #marchforblackwomen and 15,271 tweet ids for tweets that included the #hashtag M4BW. The march website is here: https://www.mamablack.org/march-for-black-women.
This dataset is published at https://dx.doi.org/10.6086/D14D4Z
]]>https://catalog.docnow.io/datasets/20171003-marchforblackwomen/https://catalog.docnow.io/datasets/20171003-marchforblackwomen/2017 Catalonia attacks were a terrorism action against pedestrians who were at La Rambla (Barcelona) and beach promenade of Cambrils on the afternoon and night of 17-18th August 2017. We selected #NoTincPor hashtag because it was the motto of the demonstrations during that period and was the most positive message. No one knew how the situation would go so the best option was to collect the dataset using the search API and limit by time between August 17 - 26, which was the final demonstration in Barcelona for the victims.
This dataset is published at http://www.docnow.io/catalog/datasets/F48D674F-C859-4E4F-AC83-510181DA3545.csv.gz
]]>https://catalog.docnow.io/datasets/20170926-notincpor/https://catalog.docnow.io/datasets/20170926-notincpor/This dataset contains Twitter JSON data for Tweets related to Hurricane Harvey and the subsequent flooding along the Texas gulf region. This dataset was created using the twarc (https://github.com/edsu/twarc) package that makes use of Twitter's search API. A total of 7,041,866 Tweets make up the combined dataset. See included README.txt for additional information.
This dataset is published at https://digital.library.unt.edu/ark:/67531/metadc993940/
]]>https://catalog.docnow.io/datasets/20170924-hurricane-harvey-twitter-dataset/https://catalog.docnow.io/datasets/20170924-hurricane-harvey-twitter-dataset/The hashtag #DrawingWhileBlack was started by artist, Annabelle, on September 15th, 2017 to celebrate the work of Black artists. The dataset includes 69,236 tweet ids collected 09/17/2017. Annabelle's Tumblr website can be found at http://sparklyfawn.tumblr.com/ and her Twitter profile is @sparklyfawn.
This dataset is published at https://dx.doi.org/10.6086/D1DT02
]]>https://catalog.docnow.io/datasets/20170917-drawingwhileblack/https://catalog.docnow.io/datasets/20170917-drawingwhileblack/This dataset contains the tweet ids of 39,695,156 tweets collected from the Twitter accounts of aproximately 4,500 news outlets, i.e., accounts of media organizations intended to disseminate news. The media organizations include everything from local U.S. newspapers to foreign television stations. They were collected between August 4, 2016 and July 20, 2018 from the Twitter API using Social Feed Manager. Note that not all accounts may have been collected for the entire duration and there may be tweets from before the time period. We intend to update this dataset periodically.
This dataset is published at http://dx.doi.org/10.7910/DVN/2FIFLH
]]>https://catalog.docnow.io/datasets/20170908-news-outlet-tweet-ids/https://catalog.docnow.io/datasets/20170908-news-outlet-tweet-ids/This dataset contains the tweet ids of 5,655,632 tweets that were collected from approximately 3000 Twitter accounts affiliated with the U.S. government. They were collected between October 21, 2016 and January 21, 2017 from the Twitter API using Social Feed Manager. This dataset was created as part of the End of Term Web Archiving initiative. The lists of accounts came from the U.S. Digital Registry and by public submissions.
This dataset is published at http://dx.doi.org/10.7910/DVN/TQBLWZ
]]>https://catalog.docnow.io/datasets/20170907-end-of-term-2016-u-s-government-twitter-archive/https://catalog.docnow.io/datasets/20170907-end-of-term-2016-u-s-government-twitter-archive/This dataset contains the tweet ids of 5,655,632 tweets that were collected from approximately 3000 Twitter accounts affiliated with the U.S. government. They were collected between October 21, 2016 and January 21, 2017 from the Twitter API using Social Feed Manager. This dataset was created as part of the End of Term Web Archiving initiative. The lists of accounts came from the U.S. Digital Registry and by public submissions.
This dataset is published at http://dx.doi.org/10.7910/DVN/TQBLWZ
]]>https://catalog.docnow.io/datasets/20170907-end-of-term-2016-us-government-twitter-archive/https://catalog.docnow.io/datasets/20170907-end-of-term-2016-us-government-twitter-archive/The 2017 solar eclipse occurred on August 21 and and was total for Oregon, Idaho, Wyoming, Nebraska, Kansas, Missouri, Illinois, Kentucky, Tennessee, North Carolina, Georgia, and South Carolina. This dataset includes 13,548,321 tweet identifiers for tweets that included any of the keywords solareclipse2017, solareclipse, eclipse2017, eclipseday or eclipse for the period August 17 to August 23, 2017. The hashtags were were selected after watching Twitter's streaming API for the trending hashtag #solareclipse2017 and counting the most popular co-occurring hashtags. The search API was used instead of the filter stream API because the stream was emitting notifications that many tweets were not delivered, since the volume was so high.
This dataset is published at https://archive.org/details/eclipse-tweets.csv
]]>https://catalog.docnow.io/datasets/20170901-2017-eclipse/https://catalog.docnow.io/datasets/20170901-2017-eclipse/This dataset contains Twitter JSON data for several Twitter search queries that were collected the week following the shooting of police officers in Dallas, Texas on July 7th 2017, using the twarc (https://github.com/edsu/twarc) package that makes use of Twitter's search API. See included README.txt for additional information.
This dataset is published at https://digital.library.unt.edu/ark:/67531/metadc991469/
]]>https://catalog.docnow.io/datasets/20170831-dallas-police-shooting-twitter-dataset/https://catalog.docnow.io/datasets/20170831-dallas-police-shooting-twitter-dataset/The #WITBragDay hashtag was used starting August 12, 2017 by women sharing their accomplishments in technology. Tweets matching the query WITBragDay were collected using using the POST statuses/filter method of the Twitter Stream API and the GET statuses/search Twitter REST API using Social Feed Manager. There are 34,266 ids for tweets retrieved from the filter stream and 47,621 ids for tweets retrieved using the search API. The dataset includes a list of 52,457 unique tweet ids from both APIs.
This dataset is published at http://dx.doi.org/10.7910/DVN/IRNS5Z
]]>https://catalog.docnow.io/datasets/20170824-witbragday/https://catalog.docnow.io/datasets/20170824-witbragday/This dataset contains the tweet ids of 35,596,281 tweets related to Hurricanes Irma and Harvey. They were collected during these events from the Twitter API using Social Feed Manager. These tweet ids are broken up into 2 collections. Each collection was collected using the POST statuses/filter method of the Twitter Stream API.
This dataset is published at http://dx.doi.org/10.7910/DVN/QRKIBW
]]>https://catalog.docnow.io/datasets/20171113-hurricanes-harvey-and-irma-tweet-ids/https://catalog.docnow.io/datasets/20171113-hurricanes-harvey-and-irma-tweet-ids/The Unite the Right rally (also known as the Charlottesville rally) was a protest in Charlottesville, Virginia, United States from August 11–12, 2017, to oppose the removal of a statue of Robert E. Lee in Emancipation Park, which itself was renamed from Lee Park two months earlier. Protesters included white supremacists, white nationalists, neo-Confederates, neo-Nazis, and militias. This dataset contains 200,113 tweet ids collected with the #unitetheright hashtag. Data collection was performed twice from the search API using twarc: once at 2017-08-13 11:46:05 GMT and the other at 2017-08-15 12:03:48 GMT. The second search was run to collect only up to where the first search left off. The time ranges for the tweets are from 2017-08-04 11:44:12 to 2017-08-15 16:03:30 GMT.
This dataset is published at https://archive.org/details/unitetheright-ids
]]>https://catalog.docnow.io/datasets/20170818-unitetheright/https://catalog.docnow.io/datasets/20170818-unitetheright/On Friday, August 11th, 2017 a large groups of racist white nationalists carrying torches marched on the University of Virginia campus in Charlottesville, VA as an intimidation tactic against proponents for the removal of confederate statues of Robert E. Lee. The Friday evening march was held ahead of a much larger racist white nationalist rally in the center of Charlottesville planned for Saturday, August 12th, 2017. This dataset includes 100,000 tweet ids collected using the DocNow prototype http://app.docnow.io/ and includes tweets sent from 01:13:56 - 7:11:36 EDT on August 12.
This dataset is published at https://dx.doi.org/10.6086/D10T0B
]]>https://catalog.docnow.io/datasets/20170812-charlottesville/https://catalog.docnow.io/datasets/20170812-charlottesville/This dataset contains 32,056 tweets that mention "ferguson" between August 8 and August 10, 2014. They were collected on May 7th, 2015 from the search form on Twitter's website. Some important side effects to be aware of is that the dataset does not include retweets and tweets that were deleted before May 7th, 2015.
This dataset is published at https://archive.org/details/ferguson-201408
]]>https://catalog.docnow.io/datasets/20170809-ferguson-august-8-10-2014/https://catalog.docnow.io/datasets/20170809-ferguson-august-8-10-2014/39,264 IDs for tweets related to the Charlottesville KKK rally on July 8, 2017. These tweet IDs matched a search for 'Charlottesville KKK OR #charlottesvilleKKK OR #blocKKK or #blocKKKparty'. These tweet IDs were collected with the twarc command line tool from Documenting the Now. Using twarc's hydrate command, researchers can retrieve the full content of those tweets—with additional metadata provided by Twitter's API—provided the tweets still exist.
This dataset is published at https://dataverse.lib.virginia.edu/dataset.xhtml?persistentId=doi:10.18130/V3/MSCNLT
]]>https://catalog.docnow.io/datasets/20170711-charlottesville-kkk-tweets/https://catalog.docnow.io/datasets/20170711-charlottesville-kkk-tweets/Identifiers for 25,489 tweets about the students' strike at the University of Puerto Rico. The tweets included the hashtag #HuelgaUPR or #Huelga2017 and are from April 11 to May 18, 2017. The tweets were collected using twarc. For a list of resources about the strike visit Puerto Rico Syllabus.
+Identificadores de 25,439 tuits sobre la huelga estudiantil en la Universidad de Puerto Rico. Los tuits fueron capturados utilizando twarc y cubren el periodo del 11 de abril al 18 de mayo. Para más información sobre la huelga visite Puerto Rico Syllabus.
This dataset is published at https://archive.org/details/tweet-ids_HuelgaUPR20170420-0518
]]>https://catalog.docnow.io/datasets/20170519-huelgaupr-tweets-april-20-may-18-2017/https://catalog.docnow.io/datasets/20170519-huelgaupr-tweets-april-20-may-18-2017/Identifiers for 782,509 tweets that included the hashtag #macronleaks or #macrongate that were sent between 2017-05-10 16:14:51 and 2017-05-02 07:02:05 UTC. The tweets were collected from the Twitter Search API using twarc. The data does not include the first use of the #macrongate hashtag, but it does include the first use of the #macronleaks hashtag which went viral after Wikileaks retweeted it. More about the story of the #marconleaks hashtag can be found at: http://www.newyorker.com/news/news-desk/the-far-right-american-nationalist-who-tweeted-macronleaks
This dataset is published at https://archive.org/details/MacronleaksTweets
]]>https://catalog.docnow.io/datasets/20170517-macronleaks-tweets/https://catalog.docnow.io/datasets/20170517-macronleaks-tweets/On 20 April 2017 the Australian Government announced that the Australian citizenship test would be made harder, with an increased focus on 'Australian values'. Suggestions as to what 'Australian values' might actually be soon started to be shared on Twitter using the hashtag #australianvalues. 55,698 tweet ids for #australianvales collected with #Documenting the Now's Twarc from 20 to 27 April 2017.
This dataset is published at https://doi.org/10.6084/m9.figshare.4982747
]]>https://catalog.docnow.io/datasets/20170508-australianvalues-tweets-20-27-april-2017/https://catalog.docnow.io/datasets/20170508-australianvalues-tweets-20-27-april-2017/681,668 tweet ids for #climatemarch collected with Documenting the Now's twarc from January 22-26, 2017. Tweets can be "rehydrated" with Documenting the Now's twarc (https://github.com/DocNow/twarc). twarc.py hydrate climatemarchtweetids.txt > climatemarch.json.
This dataset is published at http://dx.doi.org/10.5683/SP/KZZVZW
]]>https://catalog.docnow.io/datasets/20170503-climatemarch-tweets-april-19-may-3-2017/https://catalog.docnow.io/datasets/20170503-climatemarch-tweets-april-19-may-3-2017/This bag contains 10,159,892 tweets and retweets sent by or to Twitter user jkrowling between 2015-07-08 and 2017-03-18. The tweets were collected with Social Feed Manager (m5003).
This dataset is published at https://archive.org/details/Jk_rowlingTweets
]]>https://catalog.docnow.io/datasets/20170427-j-k-rowling-tweets-retweets/https://catalog.docnow.io/datasets/20170427-j-k-rowling-tweets-retweets/This bag contains 10,159,892 tweets and retweets sent by or to Twitter user jkrowling between 2015-07-08 and 2017-03-18. The tweets were collected with Social Feed Manager (m5003).
This dataset is published at https://archive.org/details/Jk_rowlingTweets
]]>https://catalog.docnow.io/datasets/20170427-j-k-rowling-tweetsretweets/https://catalog.docnow.io/datasets/20170427-j-k-rowling-tweetsretweets/1,276,220 tweet ids for #MarchForScience collected with Documenting the Now's twarc from January 22-26, 2017. Tweets can be "rehydrated" with Documenting the Now's twarc (https://github.com/DocNow/twarc). twarc.py hydrate MarchForScience_tweet-ids.txt > MarchForScience.json.
This dataset is published at http://dx.doi.org/10.5683/SP/7BC9V1
]]>https://catalog.docnow.io/datasets/20170426-marchforscience-tweets-april-12-26-2017/https://catalog.docnow.io/datasets/20170426-marchforscience-tweets-april-12-26-2017/The hashtag #BlackWomenAtWork began trending following Fox News host, Bill O'Reilly's sexist and racist comments about California Congresswoman's Maxine Water's hair on March 28th, 2017 and White House Press Secretary, Sean Spicer's remarks to journalist, April Ryan during press briefing on the same day. The hashtag began trending after Brittany Packnett used it in a set of tweets where she asked black women to share their experiences about being black women at work. These tweet ids were collected on four separate occasions using the DocNow prototype twitter collection tool. bwaw1 (10,000 tweets), bwaw2 (41,256 tweets), bwaw3 (92,756 tweets) were collected on March 28th, the day the hashtag began trending. bwaw4 (140,000 tweets) was collected on March 29th.
This dataset is published at https://dx.doi.org/10.6086/D1PP4P
]]>https://catalog.docnow.io/datasets/20170329-blackwomenatwork/https://catalog.docnow.io/datasets/20170329-blackwomenatwork/This bag contains 2,711,011 tweets identifiers collected from the Twitter filter stream between 2017-02-09 and 2017-03-18 that used one or more of the following hashtags: alternativefacts, fakenews, truthiness, postfact, posttruth, factcheck. The original tweets were collected using twarc.
This dataset is published at https://archive.org/details/fakenews-tweets
]]>https://catalog.docnow.io/datasets/20170328-fake-news-tweets/https://catalog.docnow.io/datasets/20170328-fake-news-tweets/This dataset contains the tweet ids of 7,275,228 tweets related to the Women's March on January 21, 2017. They were collected between December 19, 2016 and January 23, 2017 from the Twitter API using Social Feed Manager. See included README.txt for additional information.
This dataset is published at https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/5ZVMOR
]]>https://catalog.docnow.io/datasets/20170203-women-s-march-tweet-ids/https://catalog.docnow.io/datasets/20170203-women-s-march-tweet-ids/This dataset contains the tweet ids of 7,275,228 tweets related to the Women's March on January 21, 2017. They were collected between December 19, 2016 and January 23, 2017 from the Twitter API using Social Feed Manager. See included README.txt for additional information.
This dataset is published at https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/5ZVMOR
]]>https://catalog.docnow.io/datasets/20170203-womens-march-tweet-ids/https://catalog.docnow.io/datasets/20170203-womens-march-tweet-ids/brexit tweets collected from the 5th of May to the 24th August 2016.
This dataset is published at https://zenodo.org/record/263584
]]>https://catalog.docnow.io/datasets/20170129-brexit-summer-2016/https://catalog.docnow.io/datasets/20170129-brexit-summer-2016/14,478,518 tweet ids for #WomensMarch collected with Documenting the Now's twarc from January 21-28, 2017. Tweets can be "rehydrated" with Documenting the Now's twarc (https://github.com/DocNow/twarc). twarc.py –hydrate WomensMarchtweetids.txt > WomensMarch.json Also included are the logs files for the Filter API and Search API queries. The Filter API query captures the cumulative number of dropped tweets.
This dataset is published at http://dx.doi.org/10.5683/SP/ZEL1Q6
]]>https://catalog.docnow.io/datasets/20170129-womensmarch-tweets-january-12-28-2017/https://catalog.docnow.io/datasets/20170129-womensmarch-tweets-january-12-28-2017/These 136,990 tweet ids represent reaction to a Facebook Live video that was posted on January 3rd, 2017, showing four African American men violently attacking a white, mentally disabled man. The tweets were collected on 01/05/2017. After the video surfaced, the Twitter hashtag, #BLMkidnapping, was created and used to incorrectly attribute the violent attack to members of the Black Lives Matter movement. Police in Chicago, where the attack took place, have found no evidence the attack has any connection to the Black Lives Matter movement. This link is to a CNN story documenting the police denial of Black Lives Matter connection: http://www.cnn.com/2017/01/05/us/black-lives-matter-chicago-facebook-live-beating/index.html
This dataset is published at https://dx.doi.org/10.6086/D1CC7X
]]>https://catalog.docnow.io/datasets/20170115-blmkidnapping/https://catalog.docnow.io/datasets/20170115-blmkidnapping/On January 12th, 2017 the Senate voted 51-48 to approve a budget resolution as the first step in repealing the Affordable Care Act. The hashtag #SaveACA began being used heavily on Twitter the same day as a response. This dataset includes tweet ids collected on four separate occasions on January 12th and 13th, 2017 for the hashtag #SaveACA
This dataset is published at https://dx.doi.org/10.6086/D17P4N
]]>https://catalog.docnow.io/datasets/20170115-saveaca/https://catalog.docnow.io/datasets/20170115-saveaca/An ongoing collection of Tweets collected by NCSU Libraries using twarc for the key terms "HB2", "WeAreNotThis", and "BoycottNC", "KeepNCFair", and "ThisIsNotUs". "WeAreNotThis", "BoycottNC", "ThisIsNotUs", and "North Carolina" beginning on 2016-03-24, and "HB2" beginning on 2016-12-25. Only Tweets including "HB2", "bathroom", "bill", or "KeepNCFair" are included from the "North Carolina" set. These tags were used to discuss North Carolina House Bill 2 (The Public Facilities Privacy & Security Act), passed in March 2016, which includes provisions (among others) that disallow local municipalities from passing their own anti-discrimination ordinances and also require individuals, when using use public bathrooms, to use those that align with their sex as stated on their birth certificates rather than the restroom that is consistent with their gender identity (see: https://en.wikipedia.org/wiki/Public_Facilities_Privacy_%26_Security_Act). This dataset is broken into files of no more than 50,000 Tweet IDs each.
This dataset is published at https://github.com/NCSU-Libraries/HB2-Twitter-data
]]>https://catalog.docnow.io/datasets/20170112-north-carolina-house-bill-2-tweet-ids/https://catalog.docnow.io/datasets/20170112-north-carolina-house-bill-2-tweet-ids/A list of 10,538 Twitter IDs for tweets harvested between 4 January at 11am and 9 January at 11am using Social Feed Manager. As this used the search API, the 4 January at 11am crawl went back about 5-9 days. Tweet IDs included, as is a log of the decisions made to curate this dataset.
This dataset is published at http://dx.doi.org/10.5683/SP/CFVF1F
]]>https://catalog.docnow.io/datasets/20170109-american-historical-association-2017-conference-tweets/https://catalog.docnow.io/datasets/20170109-american-historical-association-2017-conference-tweets/A list of 24876 Twitter IDs for tweets harvested between Nov. 28 and Dec. 6 2014 containing the hashtag #bill10. Bill 10 in the Alberta legislature would have given public and Catholic school boards the right to refuse student requests to form gay-straight alliances in schools. Under intense public interest it was withdrawn by the Conservative government.
This dataset is published at http://dx.doi.org/10.7939/DVN/10971
]]>https://catalog.docnow.io/datasets/20170107-bill-10-twitter-ids/https://catalog.docnow.io/datasets/20170107-bill-10-twitter-ids/This is a dataset of ids for tweets purchased from Twitter as part of the Beyond the Hashtags study http://cmsimpact.org/resource/beyond-hashtags-ferguson-blacklivesmatter-online-struggle-offline-justice/ The dataset includes a year of tweets that mention one or more of 45 keywords associated with the BlackLivesMatter movement. This period covers a critical time in which social media was used to raise awareness about police killings of unarmed Black citizens in the United States.
This dataset is published at http://dfreelon.org/2017/01/03/beyond-the-hashtags-twitter-data/
]]>https://catalog.docnow.io/datasets/20170104-beyond-the-hashtags-ferguson-blacklivesmatter-and-the-online-struggle-for-offline-justice/https://catalog.docnow.io/datasets/20170104-beyond-the-hashtags-ferguson-blacklivesmatter-and-the-online-struggle-for-offline-justice/228,086 tweet ids for "TheHip, hipinkingston" captured during the Tragically Hip's final concert in Kingston, Ontario in August 2016. Tweets can be "rehydrated" with Documenting the Now's twarc (https://github.com/DocNow/twarc). twarc.py --hydrate thfinalconcertkingstontweetids.txt > thfinalconcertkingston.json
This dataset is published at http://dx.doi.org/10.5683/SP/NGMQNX
]]>https://catalog.docnow.io/datasets/20161231-tweet-ids-for-final-tragically-hip-concert/https://catalog.docnow.io/datasets/20161231-tweet-ids-for-final-tragically-hip-concert/These are tweets that were collected between August 27, 2015 and January 4, 2016 that mention the word "trump". This period marked important early months in the Republican primaries. They were collected from Twitter's streaming API using twarc.
+There are 40,202,199 tweet identifiers in all. Due to network outages there are gaps at the following points: 2015-08-27 19:12:37 - 2015-08-27 20:13:44 ; 2015-11-02 02:02:13 - 2015-11-05 16:20:35 ; 2015-12-28 02:02:42 - 2015-12-28 02:04:00
This dataset is published at https://archive.org/details/trump-tweet-ids
]]>https://catalog.docnow.io/datasets/20161230-trump-early-primary-tweets/https://catalog.docnow.io/datasets/20161230-trump-early-primary-tweets/8,595,589 tweet ids for aleppo tweets captured during the fall of Aleppo in December 2016. Tweets can be "rehydrated" with Documenting the Now's twarc (https://github.com/DocNow/twarc). twarc.py --hydrate aleppotweetids.txt > aleppo.json
This dataset is published at http://dx.doi.org/10.5683/SP/DJLHSB
]]>https://catalog.docnow.io/datasets/20161230-the-fall-of-aleppo-tweets-aleppo-2016-12-13-through-2016-12-29/https://catalog.docnow.io/datasets/20161230-the-fall-of-aleppo-tweets-aleppo-2016-12-13-through-2016-12-29/This item represents a collection of 13,480,000 tweet IDs that mentioned 'ferguson' from 2014-08-10 to 2014-08-27 and 15,080,078 tweet IDs that mention "ferguson" between 2014-11-11 and 2014-12-08.
The first set includes tweets for the two week period after the shooting of Michael Brown, and the second range includes tweets around the grand jury's decision not to indict police office Darren Wilson which was announced on 2014-11-24.
-The first set of tweets were collected by Ed Summers at the University of Maryland and the second was a collaboration between Molly Loyd, Gregory Coleman, Kimberly Lamke, Benjamin Sugar and Ed Summers.
This dataset is published at https://archive.org/details/ferguson-tweet-ids
]]>https://catalog.docnow.io/datasets/20161224-ferguson-tweets/https://catalog.docnow.io/datasets/20161224-ferguson-tweets/This dataset contains Twitter JSON data for several Twitter search queries that were collected around the #YesAllWomen Twitter "conversation" between May 25, 2014 and June 8, 2014 using the twarc (https://github.com/edsu/twarc) package that makes use of Twitter's search API. A total of 2,805,763 Tweets and 34,532 images make up the combined dataset.
This dataset is published at http://digital.library.unt.edu/ark:/67531/metadc304853/
]]>https://catalog.docnow.io/datasets/20161224-yes-all-women-twitter-dataset/https://catalog.docnow.io/datasets/20161224-yes-all-women-twitter-dataset/Tweet ids for #MakeDonaldDrumpfAgain tweets.
This dataset is published at http://hdl.handle.net/10864/11491
]]>https://catalog.docnow.io/datasets/20161223-makedonalddrumpfagain-tweets/https://catalog.docnow.io/datasets/20161223-makedonalddrumpfagain-tweets/Tweet ids for #NDP2016 tweets during the 2016 NDP Convention.
This dataset is published at http://hdl.handle.net/10864/11674
]]>https://catalog.docnow.io/datasets/20161223-ndp2016-tweets/https://catalog.docnow.io/datasets/20161223-ndp2016-tweets/Tweet ids for #panamapapers tweets.
This dataset is published at http://hdl.handle.net/10864/11592
]]>https://catalog.docnow.io/datasets/20161223-panamapapers-tweets/https://catalog.docnow.io/datasets/20161223-panamapapers-tweets/Tweet ids for #thechalkening tweets.
This dataset is published at http://hdl.handle.net/10864/11591
]]>https://catalog.docnow.io/datasets/20161223-thechalkening-tweets/https://catalog.docnow.io/datasets/20161223-thechalkening-tweets/Tweet ids for #paris #Bataclan #parisattacks #porteouverte tweets.
This dataset is published at http://hdl.handle.net/10864/11312
]]>https://catalog.docnow.io/datasets/20161223-paris-bataclan-parisattacks-porteouverte-tweets/https://catalog.docnow.io/datasets/20161223-paris-bataclan-parisattacks-porteouverte-tweets/Tweet ids for #YMMfire tweets captured during the 2016 Fort McMurray Wildfire from 2016-05-01 to 2016-06-25.
This dataset is published at http://hdl.handle.net/10864/12033
]]>https://catalog.docnow.io/datasets/20161223-ymmfire-tweets/https://catalog.docnow.io/datasets/20161223-ymmfire-tweets/This data set identifies 38M tweets collected for the analysis of social media messages related to the 2012 U.S. Presidential election. The data set provides tweet IDs for tweets containing the words "obama", "romney", or both (case-insensitive matching) during the period from July 1, 2012 through November 7, 2012. The paper, “Online and Social Media Data As an Imperfect Continuous Panel Survey.” PLoS ONE 11(1): e0145406 by Diaz et al. provides further description of the dataset.
This dataset is published at https://www.microsoft.com/en-us/download/details.aspx?id=52598
]]>https://catalog.docnow.io/datasets/20161223-election-2012-tweet-id-dataset/https://catalog.docnow.io/datasets/20161223-election-2012-tweet-id-dataset/This dataset contains the tweet ids of approximately 280 million tweets related to the 2016 United States presidential election. They were collected between July 13, 2016 and November 10, 2016 from the Twitter API using Social Feed Manager. These tweet ids are broken up into 12 collections. Each collection was collected either from the GET statuses/user_timeline method of the Twitter REST API or the POST statuses/filter method of the Twitter Stream API.
This dataset is published at http://hdl.handle.net/10.7910/DVN/PDI7IN
]]>https://catalog.docnow.io/datasets/20161223-2016-united-states-presidential-election-tweet-ids/https://catalog.docnow.io/datasets/20161223-2016-united-states-presidential-election-tweet-ids/Tweet ids for #JeSuisCharlie, #JeSuisAhmed, #JeSuisJuif, #CharlieHebdo tweets.
This dataset is published at http://hdl.handle.net/10864/10830
]]>https://catalog.docnow.io/datasets/20161223-jesuischarlie-jesuisahmed-jesuisjuif-charliehebdo-tweets/https://catalog.docnow.io/datasets/20161223-jesuischarlie-jesuisahmed-jesuisjuif-charliehebdo-tweets/Tweet IDs for tweets carrying the #cdnpoli hashtag, applied to Canadian politics, collected as part of a larger project centered on Canada's 42nd federal election.
This dataset is published at http://hdl.handle.net/10864/11348
]]>https://catalog.docnow.io/datasets/20161223-cdnpoli-tweets-canadian-politics-2015/https://catalog.docnow.io/datasets/20161223-cdnpoli-tweets-canadian-politics-2015/
\ No newline at end of file
+The first set of tweets were collected by Ed Summers at the University of Maryland and the second was a collaboration between Molly Loyd, Gregory Coleman, Kimberly Lamke, Benjamin Sugar and Ed Summers.
This dataset is published at https://archive.org/details/ferguson-tweet-ids
]]>https://catalog.docnow.io/datasets/20161224-ferguson-tweets/https://catalog.docnow.io/datasets/20161224-ferguson-tweets/Tweet ids for #elxn42 tweets.
This dataset is published at http://hdl.handle.net/10864/11270
]]>https://catalog.docnow.io/datasets/20161224-elxn42-tweets-42nd-canadian-federal-election/https://catalog.docnow.io/datasets/20161224-elxn42-tweets-42nd-canadian-federal-election/This dataset contains Twitter JSON data for several Twitter search queries that were collected around the #YesAllWomen Twitter "conversation" between May 25, 2014 and June 8, 2014 using the twarc (https://github.com/edsu/twarc) package that makes use of Twitter's search API. A total of 2,805,763 Tweets and 34,532 images make up the combined dataset.
This dataset is published at http://digital.library.unt.edu/ark:/67531/metadc304853/
]]>https://catalog.docnow.io/datasets/20161224-yes-all-women-twitter-dataset/https://catalog.docnow.io/datasets/20161224-yes-all-women-twitter-dataset/Tweet ids for #MakeDonaldDrumpfAgain tweets.
This dataset is published at http://hdl.handle.net/10864/11491
]]>https://catalog.docnow.io/datasets/20161223-makedonalddrumpfagain-tweets/https://catalog.docnow.io/datasets/20161223-makedonalddrumpfagain-tweets/Tweet ids for #NDP2016 tweets during the 2016 NDP Convention.
This dataset is published at http://hdl.handle.net/10864/11674
]]>https://catalog.docnow.io/datasets/20161223-ndp2016-tweets/https://catalog.docnow.io/datasets/20161223-ndp2016-tweets/Tweet ids for #panamapapers tweets.
This dataset is published at http://hdl.handle.net/10864/11592
]]>https://catalog.docnow.io/datasets/20161223-panamapapers-tweets/https://catalog.docnow.io/datasets/20161223-panamapapers-tweets/Tweet ids for #paris #Bataclan #parisattacks #porteouverte tweets.
This dataset is published at http://hdl.handle.net/10864/11312
]]>https://catalog.docnow.io/datasets/20161223-paris-bataclan-parisattacks-porteouverte-tweets/https://catalog.docnow.io/datasets/20161223-paris-bataclan-parisattacks-porteouverte-tweets/Tweet ids for #thechalkening tweets.
This dataset is published at http://hdl.handle.net/10864/11591
]]>https://catalog.docnow.io/datasets/20161223-thechalkening-tweets/https://catalog.docnow.io/datasets/20161223-thechalkening-tweets/Tweet ids for #YMMfire tweets captured during the 2016 Fort McMurray Wildfire from 2016-05-01 to 2016-06-25.
This dataset is published at http://hdl.handle.net/10864/12033
]]>https://catalog.docnow.io/datasets/20161223-ymmfire-tweets/https://catalog.docnow.io/datasets/20161223-ymmfire-tweets/This dataset contains the tweet ids of approximately 280 million tweets related to the 2016 United States presidential election. They were collected between July 13, 2016 and November 10, 2016 from the Twitter API using Social Feed Manager. These tweet ids are broken up into 12 collections. Each collection was collected either from the GET statuses/user_timeline method of the Twitter REST API or the POST statuses/filter method of the Twitter Stream API.
This dataset is published at http://hdl.handle.net/10.7910/DVN/PDI7IN
]]>https://catalog.docnow.io/datasets/20161223-2016-united-states-presidential-election-tweet-ids/https://catalog.docnow.io/datasets/20161223-2016-united-states-presidential-election-tweet-ids/This data set identifies 38M tweets collected for the analysis of social media messages related to the 2012 U.S. Presidential election. The data set provides tweet IDs for tweets containing the words "obama", "romney", or both (case-insensitive matching) during the period from July 1, 2012 through November 7, 2012. The paper, “Online and Social Media Data As an Imperfect Continuous Panel Survey.” PLoS ONE 11(1): e0145406 by Diaz et al. provides further description of the dataset.
This dataset is published at https://www.microsoft.com/en-us/download/details.aspx?id=52598
]]>https://catalog.docnow.io/datasets/20161223-election-2012-tweet-id-dataset/https://catalog.docnow.io/datasets/20161223-election-2012-tweet-id-dataset/Tweet ids for #JeSuisCharlie, #JeSuisAhmed, #JeSuisJuif, #CharlieHebdo tweets.
This dataset is published at http://hdl.handle.net/10864/10830
]]>https://catalog.docnow.io/datasets/20161223-jesuischarlie-jesuisahmed-jesuisjuif-charliehebdo-tweets/https://catalog.docnow.io/datasets/20161223-jesuischarlie-jesuisahmed-jesuisjuif-charliehebdo-tweets/Tweet IDs for tweets carrying the #cdnpoli hashtag, applied to Canadian politics, collected as part of a larger project centered on Canada's 42nd federal election.
This dataset is published at http://hdl.handle.net/10864/11348
]]>https://catalog.docnow.io/datasets/20161223-cdnpoli-tweets-canadian-politics-2015/https://catalog.docnow.io/datasets/20161223-cdnpoli-tweets-canadian-politics-2015/
\ No newline at end of file