-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreferences.bib
389 lines (348 loc) · 20.8 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
@article{sobel-73,
author = {Sobel, Irwin and Feldman, Gary},
year = {1973},
month = {01},
pages = {271-272},
title = {A 3×3 isotropic gradient operator for image processing},
journal = {Pattern Classification and Scene Analysis}
}
@article{brown2020language,
title={Language models are few-shot learners},
author={Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and others},
journal={Advances in neural information processing systems},
volume={33},
pages={1877--1901},
year={2020}
}
@inproceedings{pennington2014glove,
title={Glove: Global vectors for word representation},
author={Pennington, Jeffrey and Socher, Richard and Manning, Christopher D},
booktitle={Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP)},
pages={1532--1543},
year={2014}
}
@inproceedings{galiana2022,
author = {Galiana, Lino and Suarez Castillo, Milena},
title = {Fuzzy Matching on Big-Data: An Illustration with Scanner and Crowd-Sourced Nutritional Datasets},
year = {2022},
isbn = {9781450392846},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3524458.3547244},
doi = {10.1145/3524458.3547244},
abstract = {Food retailers’ scanner data provide unprecedented details on local consumption, provided that product identifiers allow a linkage with features of interest, such as nutritional information. In this paper, we enrich a large retailer dataset with nutritional information extracted from crowd-sourced and administrative nutritional datasets. To compensate for imperfect matching through the barcode, we develop a methodology to efficiently match short textual descriptions. After a preprocessing step to normalize short labels, we resort to fuzzy matching based on several tokenizers (including n-grams) by querying an ElasticSearch customized index and validate candidates echos as matches with a Levensthein edit-distance and an embedding-based similarity measure created from a siamese neural network model. The pipeline is composed of several steps successively relaxing constraints to find relevant matching candidates.},
booktitle = {Proceedings of the 2022 ACM Conference on Information Technology for Social Good},
pages = {331–337},
numpages = {7},
keywords = {ElasticSearch, Fuzzy matching, Siamese neural networks, Natural language processing, Word embeddings},
location = {Limassol, Cyprus},
series = {GoodIT '22}
}
@article{LuitenHoxde,
author = {Annemieke Luiten and Joop Hox and Edith de Leeuw},
doi = {doi:10.2478/jos-2020-0025},
url = {https://doi.org/10.2478/jos-2020-0025},
title = {Survey Nonresponse Trends and Fieldwork Effort in the 21st Century: Results of an International Study across Countries and Surveys},
journal = {Journal of Official Statistics},
number = {3},
volume = {36},
year = {2020},
pages = {469--487}
}
@ARTICLE{lecun-89,
author={LeCun, Y. and Boser, B. and Denker, J. S. and Henderson, D. and Howard, R. E. and Hubbard, W. and Jackel, L. D.},
journal={Neural Computation},
title={Backpropagation Applied to Handwritten Zip Code Recognition},
year={1989},
volume={1},
number={4},
pages={541-551},
doi={10.1162/neco.1989.1.4.541}
}
@article{voulodimos-18,
doi = {10.1155/2018/7068349},
url = {https://doi.org/10.1155/2018/7068349},
year = {2018},
publisher = {Hindawi Limited},
volume = {2018},
pages = {1--13},
author = {Athanasios Voulodimos and Nikolaos Doulamis and Anastasios Doulamis and Eftychios Protopapadakis},
title = {Deep Learning for Computer Vision: A Brief Review},
journal = {Computational Intelligence and Neuroscience}
}
@article{chen-17,
author = {Liang{-}Chieh Chen and
George Papandreou and
Florian Schroff and
Hartwig Adam},
title = {Rethinking Atrous Convolution for Semantic Image Segmentation},
journal = {CoRR},
volume = {abs/1706.05587},
year = {2017},
url = {http://arxiv.org/abs/1706.05587},
eprinttype = {arXiv},
eprint = {1706.05587},
timestamp = {Mon, 13 Aug 2018 16:48:07 +0200},
biburl = {https://dblp.org/rec/journals/corr/ChenPSA17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{steele-17,
doi = {10.1098/rsif.2016.0690},
url = {https://doi.org/10.1098/rsif.2016.0690},
year = {2017},
month = feb,
publisher = {The Royal Society},
volume = {14},
number = {127},
pages = {20160690},
author = {Jessica E. Steele and P{\aa}l Roe Sunds{\o}y and Carla Pezzulo and Victor A. Alegana and Tomas J. Bird and Joshua Blumenstock and Johannes Bjelland and Kenth Eng{\o}-Monsen and Yves-Alexandre de Montjoye and Asif M. Iqbal and Khandakar N. Hadiuzzaman and Xin Lu and Erik Wetter and Andrew J. Tatem and Linus Bengtsson},
title = {Mapping poverty using mobile phone and satellite data},
journal = {Journal of The Royal Society Interface}
}
@book{plan-sat,
author = {Commissariat général au développement durable – Direction de la recherche et de l’innovation},
year = {2018},
month = {07},
pages = {},
title = {Plan d’applications satellitaires 2018 - Des solutions spatiales pour connaître le territoire}
}
@article{ronneberger-15,
author = {Olaf Ronneberger and
Philipp Fischer and
Thomas Brox},
title = {U-Net: Convolutional Networks for Biomedical Image Segmentation},
journal = {CoRR},
volume = {abs/1505.04597},
year = {2015},
url = {http://arxiv.org/abs/1505.04597},
eprinttype = {arXiv},
eprint = {1505.04597},
timestamp = {Mon, 13 Aug 2018 16:46:52 +0200},
biburl = {https://dblp.org/rec/journals/corr/RonnebergerFB15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{paliwal-20,
author = {Shubham Paliwal and
Vishwanath D and
Rohit Rahul and
Monika Sharma and
Lovekesh Vig},
title = {TableNet: Deep Learning model for end-to-end Table detection and Tabular
data extraction from Scanned Document Images},
journal = {CoRR},
volume = {abs/2001.01469},
year = {2020},
url = {http://arxiv.org/abs/2001.01469},
eprinttype = {arXiv},
eprint = {2001.01469},
timestamp = {Thu, 14 Oct 2021 09:16:25 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2001-01469.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{beck2022,
title={Le multimode dans les enquêtes auprès des ménages : une collecte modernisée, un processus complexifié},
author={Beck, François and Castell, Laura and Legleye, Stéphane and Schreiber, Amandine},
journal={Courrier des statistiques},
year={2022}
}
@article{riviere2018,
title={Utiliser les déclarations administratives à des fins statistiques},
author={Rivière, Pascal},
journal={Courrier des statistiques},
year={2018}
}
@book{desrosieres2016politique,
title={La politique des grands nombres: histoire de la raison statistique},
author={Desrosi{\`e}res, Alain},
year={2010},
publisher={La d{\'e}couverte}
}
@article{desrosieres2004,
title={Enquêtes versus registres administratifs: réflexions sur la dualité des sources statistiques},
author={Desrosi{\`e}res, Alain},
journal={Courrier des statistiques},
year={2004}
}
@article{isnard2018,
title={Qu'entends-on par statistique(s) publique(s)},
author={Isnard, Michel},
journal={Courrier des statistiques},
year={2018}
}
@article{Humbert2018,
title={La déclaration sociale nominative: nouvelle référence pour les échanges de données sociales des entreprises vers les administrations},
author={Humbert-Bottin, Élisabeth},
journal={Courrier des statistiques},
year={2018}
}
@article{Renne2018,
title={Bien comprendre la déclaration sociale nominative pour mieux mesurer},
author={Renne, Catherine},
journal={Courrier des statistiques},
year={2018}
}
@article{galiana-20,
title={Retour partiel des mouvements de population avec le d{\'e}confinement},
author={Galiana, Lino and Suarez-Castillo, Milena and S{\'e}m{\'e}curbe, Fran{\c{c}}ois and Coudin, {\'E}lise and de Bellefon, Marie-Pierre},
year={2020},
publisher={Insee Analyses}
}
@article{galiana2020segregation,
title={Residential segregation, daytime segregation and spatial frictions: an analysis from mobile phone data },
author={Galiana, Lino and S{\'e}m{\'e}curbe, Fran{\c{c}}ois and Sakarovitch, Benjamin and Smoreda, Zbigniew},
year={2020},
publisher={Insee Working Paper}
}
@article{ricciato-20,
title = {Towards a methodological framework for estimating present population density from mobile network operator data},
journal = {Pervasive and Mobile Computing},
volume = {68},
pages = {101263},
year = {2020},
issn = {1574-1192},
doi = {10.1016/j.pmcj.2020.101263},
url = {https://www.sciencedirect.com/science/article/pii/S1574119220301097},
author = {Fabio Ricciato and Giampaolo Lanzieri and Albrecht Wirthmann and Gerdy Seynaeve},
keywords = {Mobile network operator data, Signalling data, Present population, Spatial density estimation, Experimental statistics},
abstract = {The concept of present population is gaining increasing attention in official statistics. One possible approach to measure present population exploits data collected by Mobile Network Operators (MNO), from simple Call Detail Records (CDR) to more informative and complex signalling records. Such data, collected primarily for network operation processes, can be repurposed to infer patterns of human mobility. Two decades of research literature have produced several case studies, mostly focused on to CDR data, and a variety of ad-hoc methodologies tailored to specific datasets. Moving beyond the stage of explorative research, the regular production of official statistics across different MNO requires a more systematic approach to methodological development. Towards this aim, Eurostat and other members of the European Statistical System are working towards the definition of a general Reference Methodological Framework for processing MNO data for official statistics. In this contribution we report on the methodological aspects related to the estimation of present population density, for which we present a general and modular methodological structure that generalises previous proposals found in the academic literature. Along the way, we define a number of specific research problems requiring further attention by the research community. We stress the importance of comparing different methodological options at various points in the data workflow, e.g. in the geolocation of individual observations and in the inference method. Finally, we present illustrative results from a case-study based on real signalling data from a European operational network, complemented by numerical results from a simple simulation scenario.}
}
@article{feuillet-18,
doi = {10.4000/cybergeo.29853},
url = {https://doi.org/10.4000/cybergeo.29853},
year = {2018},
month = dec,
publisher = {{OpenEdition}},
author = {Vincent Loonis and Marie-Pierre de Bellefon},
title = {Manuel d'analyse spatiale. Th{\'{e}}orie et mise en {\oe}uvre pratique avec R, Insee~M{\'{e}}thodes n{\textdegree}~131, Insee, Eurostat, 392~p.},
journal = {Cybergeo}
}
@article{andre-21,
title={Et pour quelques appartements de plus : {\'E}tude de la propri{\'e}t{\'e} immobili{\`e}re des m{\'e}nages et du profil redistributif de la taxe fonci{\`e}re},
author={Mathias Andr{\'e} and Olivier Meslin},
year={2021},
publisher={Documents de travail, Insee}
}
@TechReport{cerdeiro-20,
author={Mr. Diego A. Cerdeiro and Andras Komaromi and Yang Liu and Mamoon Saeed},
title={{World Seaborne Trade in Real Time: A Proof of Concept for Building AIS-based Nowcasts from Scratch}},
year=2020,
month=May,
institution={International Monetary Fund},
type={IMF Working Papers},
url={https://ideas.repec.org/p/imf/imfwpa/2020-057.html},
number={2020/057},
abstract={Maritime data from the Automatic Identification System (AIS) have emerged as a potential source for real time information on trade activity. However, no globally applicable end-to-end solution has been published to transform raw AIS messages into economically meaningful, policy-relevant indicators of international trade. Our paper proposes and tests a set of algorithms to fill this gap. We build indicators of world seaborne trade using raw data from the radio signals that the global vessel fleet emits for navigational safety purposes. We leverage different machine-learning techniques to identify port boundaries, construct port-to-port voyages, and estimate trade volumes at the world, bilateral and within-country levels. Our methodology achieves a good fit with official trade statistics for many countries and for the world in aggregate. We also show the usefulness of our approach for sectoral analyses of crude oil trade, and for event studies such as Hurricane Maria and the effect of measures taken to contain the spread of the novel coronavirus. Going forward, ongoing refinements of our algorithms, additional data on vessel characteristics, and country-specific knowledge should help improve the performance of our general approach for several country cases.},
keywords={},
doi={},
}
@unknown{imo-20,
author = {International Maritime Organization},
year = {2020},
month = {06},
pages = {},
title = {IMO Fourth Greenhouse Gas Study 2020}
}
@article{brakel-17,
author = {Brakel, Jan and Söhler, Emily and Daas, P.J.H. and Buelens, Bart},
year = {2017},
month = {12},
pages = {},
title = {Social media as a data source for official statistics; the Dutch Consumer Confidence Index},
volume = {43},
journal = {Survey methodology}
}
@ARTICLE{olteanu-19,
AUTHOR={Olteanu, Alexandra and Castillo, Carlos and Diaz, Fernando and Kıcıman, Emre},
TITLE={Social Data: Biases, Methodological Pitfalls, and Ethical Boundaries},
JOURNAL={Frontiers in Big Data},
VOLUME={2},
YEAR={2019},
URL={https://www.frontiersin.org/articles/10.3389/fdata.2019.00013},
DOI={10.3389/fdata.2019.00013},
ISSN={2624-909X},
ABSTRACT={Social data in digital form—including user-generated content, expressed or implicit relations between people, and behavioral traces—are at the core of popular applications and platforms, driving the research agenda of many researchers. The promises of social data are many, including understanding “what the world thinks” about a social issue, brand, celebrity, or other entity, as well as enabling better decision-making in a variety of fields including public policy, healthcare, and economics. Many academics and practitioners have warned against the naïve usage of social data. There are biases and inaccuracies occurring at the source of the data, but also introduced during processing. There are methodological limitations and pitfalls, as well as ethical boundaries and unexpected consequences that are often overlooked. This paper recognizes the rigor with which these issues are addressed by different researchers varies across a wide range. We identify a variety of menaces in the practices around social data use, and organize them in a framework that helps to identify them.<disp-quote>“For your own sanity, you have to remember that not all problems can be solved. Not all problems can be solved, but all problems can be illuminated.” –Ursula Franklin<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref></disp-quote>}
}
@ARTICLE{schumacher-16,
title = {A comparison of MIDAS and bridge equations},
author = {Schumacher, Christian},
year = {2016},
journal = {International Journal of Forecasting},
volume = {32},
number = {2},
pages = {257-270},
abstract = {This paper compares two single-equation approaches from the recent nowcasting literature: mixed-data sampling (MIDAS) regressions and bridge equations. Both approaches are suitable for nowcasting low-frequency variables such as the quarterly GDP using higher-frequency business cycle indicators. Three differences between the approaches are identified: (1) MIDAS is a direct multi-step nowcasting tool, whereas bridge equations provide iterated forecasts; (2) the weighting of high-frequency predictor observations in MIDAS is based on functional lag polynomials, whereas the bridge equation weights are fixed partly by time aggregation; (3) for parameter estimation, the MIDAS equations consider current-quarter leads of high-frequency indicators, whereas bridge equations typically do not. To assist in discussing the differences between the approaches in isolation, intermediate specifications between MIDAS and bridge equations are provided. The alternative models are compared in an empirical application to nowcasting GDP growth in the Euro area, given a large set of business cycle indicators.},
keywords = {Mixed-data sampling; Bridge equations; Nowcasting;},
url = {https://EconPapers.repec.org/RePEc:eee:intfor:v:32:y:2016:i:2:p:257-270}
}
@inbook {stock-10,
title = {Dynamic Factor Models},
booktitle = {Oxford Handbook of Economic Forecasting},
year = {2010},
publisher = {Oxford University Press},
organization = {Oxford University Press},
address = {Oxford},
url = {http://www.economics.harvard.edu/faculty/stock/files/dfm_oup_4.pdf},
author = {James Stock and Mark Watson},
editor = {Michael P. Clements and David F. Henry}
}
@misc{hopp-21,
doi = {10.48550/ARXIV.2106.08901},
url = {https://arxiv.org/abs/2106.08901},
author = {Hopp, Daniel},
keywords = {Econometrics (econ.EM), Machine Learning (cs.LG), FOS: Economics and business, FOS: Economics and business, FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {Economic Nowcasting with Long Short-Term Memory Artificial Neural Networks (LSTM)},
publisher = {arXiv},
year = {2021},
copyright = {Creative Commons Attribution 4.0 International}
}
@article{richardson-18,
author = {Richardson, Pete},
title = {Nowcasting and the Use of Big Data in Short Term Macroeconomic Forecasting: A Critical Review},
year = {2018},
url = {https://www.persee.fr/doc/estat_0336-1454_2018_num_505_1_10867},
note = {Included in a thematic issue : Big Data and Statistics (Part 1)},
journal = {Economie et Statistique},
volume = {505},
number = {1},
doi = {10.24187/ecostat.2018.505d.1966},
pages = {65--87}
}
@article{bortoli-18,
TITLE = {{Nowcasting GDP Growth by Reading Newspapers}},
AUTHOR = {Bortoli, Cl{\'e}ment and Combes, St{\'e}phanie and Renault, Thomas},
URL = {https://hal.archives-ouvertes.fr/hal-03205161},
JOURNAL = {{Economie et Statistique / Economics and Statistics}},
PUBLISHER = {{INSEE}},
SERIES = {Big Data and Statistics - Part 1},
NUMBER = {505-506},
PAGES = {17-33},
YEAR = {2018},
DOI = {10.24187/ecostat.2018.505d.1964},
KEYWORDS = {economic analysis ; nowcasting ; GDP ; media ; Big Data ; sentiment analysis ; machine learning ; natural language analysis},
HAL_ID = {hal-03205161},
HAL_VERSION = {v1},
}
@TechReport{fornaro-20,
author={Fornaro, Paolo},
title={{Nowcasting Industrial Production Using Uncoventional Data Sources}},
year=2020,
month=Jun,
institution={The Research Institute of the Finnish Economy},
type={ETLA Working Papers},
url={https://ideas.repec.org/p/rif/wpaper/80.html},
number={80},
abstract={ In this work, we rely on unconventional data sources to nowcast the year-on-year growth rate of Finnish industrial production, for different industries. As predictors, we use real-time truck traffic volumes measured automatically in different geographical locations around Finland, as well as electricity consumption data. In addition to standard time-series models, we look into the adoption of machine learning techniques to compute the predictions. We find that the use of non-typical data sources such as the volume of truck traffic is beneficial, in terms of predictive power, giving us substantial gains in nowcasting performance compared to an autoregressive model. Moreover, we find that the adoption of machine learning techniques improves substantially the accuracy of our predictions in comparison to standard linear models. While the average nowcasting errors we obtain are higher compared to the current revision errors of the official statistical institute, our nowcasts provide clear signals of the overall trend of the series and of sudden changes in growth.},
keywords={Flash Estimates; Machine Learning; Big Data; Nowcasting},
doi={},
}
@misc{salgado-20,
doi = {10.48550/ARXIV.2003.06797},
url = {https://arxiv.org/abs/2003.06797},
author = {Salgado, David and Oancea, Bogdan},
keywords = {Other Statistics (stat.OT), Applications (stat.AP), FOS: Computer and information sciences, FOS: Computer and information sciences, J.1; J.4; H.4, 62P25, 62P20},
title = {On new data sources for the production of official statistics},
publisher = {arXiv},
year = {2020},
copyright = {Creative Commons Attribution 4.0 International}
}
@book{hacking-90, place={Cambridge}, series={Ideas in Context}, title={The Taming of Chance}, DOI={10.1017/CBO9780511819766}, publisher={Cambridge University Press}, author={Hacking, Ian}, year={1990}, collection={Ideas in Context}}