-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathsources.bib
223 lines (204 loc) · 8.17 KB
/
sources.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
@InProceedings{ hosseini14learningto,
author = {Mohammad Javad Hosseini and Hannaneh Hajishirzi and Oren
Etzioni and Nate Kushman},
title = {Learning to solve arithmetic word problems with verb
categorization},
booktitle = {In Conference on Empirical Methods in Natural Language
Processing (EMNLP)},
year = {2014}
}
@Article{ hendrycks2021measuring,
title = {Measuring mathematical problem solving with the math
dataset},
author = {Hendrycks, Dan and Burns, Collin and Kadavath, Saurav and
Arora, Akul and Basart, Steven and Tang, Eric and Song,
Dawn and Steinhardt, Jacob},
journal = {arXiv preprint arXiv:2103.03874},
year = {2021}
}
@Article{ hendrycks2021measuringcode,
title = {Measuring coding challenge competence with apps},
author = {Hendrycks, Dan and Basart, Steven and Kadavath, Saurav and
Mazeika, Mantas and Arora, Akul and Guo, Ethan and Burns,
Collin and Puranik, Samir and He, Horace and Song, Dawn and
others},
journal = {arXiv preprint arXiv:2105.09938},
year = {2021}
}
@InProceedings{ miao2020diverse,
title = {A Diverse Corpus for Evaluating and Developing English
Math Word Problem Solvers},
author = {Miao, Shen-Yun and Liang, Chao-Chun and Su, Keh-Yih},
booktitle = {Proceedings of the 58th Annual Meeting of the Association
for Computational Linguistics},
pages = {975--984},
year = {2020}
}
@InProceedings{ yin2018mining,
author = {Yin, Pengcheng and Deng, Bowen and Chen, Edgar and
Vasilescu, Bogdan and Neubig, Graham},
title = {Learning to Mine Aligned Code and Natural Language Pairs
from Stack Overflow},
booktitle = {International Conference on Mining Software Repositories},
series = {MSR},
pages = {476--486},
year = {2018},
publisher = {ACM},
doi = {https://doi.org/10.1145/3196398.3196408}
}
@Article{ saxton2019analysing,
title = {Analysing mathematical reasoning abilities of neural
models},
author = {Saxton, David and Grefenstette, Edward and Hill, Felix and
Kohli, Pushmeet},
journal = {arXiv preprint arXiv:1904.01557},
year = {2019}
}
@InProceedings{ huang2016well,
title = {How well do computers solve math word problems?
large-scale dataset construction and evaluation},
author = {Huang, Danqing and Shi, Shuming and Lin, Chin-Yew and Yin,
Jian and Ma, Wei-Ying},
booktitle = {Proceedings of the 54th Annual Meeting of the Association
for Computational Linguistics (Volume 1: Long Papers)},
pages = {887--896},
year = {2016}
}
@TechReport{ upadhyay2015draw,
title = {Draw: A challenging and diverse algebra word problem set},
author = {Upadhyay, Shyam and Chang, Ming-Wei},
year = {2015},
institution = {Citeseer}
}
@Article{ cobbe2021training,
title = {Training verifiers to solve math word problems},
author = {Cobbe, Karl and Kosaraju, Vineet and Bavarian, Mohammad
and Hilton, Jacob and Nakano, Reiichiro and Hesse,
Christopher and Schulman, John},
journal = {arXiv preprint arXiv:2110.14168},
year = {2021}
}
@Article{ amini2019mathqa,
title = {MathQA: Towards Interpretable Math Word Problem Solving
with Operation-Based Formalisms},
author = {Amini, Aida and Gabriel, Saadia and Lin, Peter and
Koncel-Kedziorski, Rik and Choi, Yejin and Hajishirzi,
Hannaneh},
journal = {arXiv preprint arXiv:1905.13319},
year = {2019}
}
@Article{ austin2021program,
title = {Program synthesis with large language models},
author = {Austin, Jacob and Odena, Augustus and Nye, Maxwell and
Bosma, Maarten and Michalewski, Henryk and Dohan, David and
Jiang, Ellen and Cai, Carrie and Terry, Michael and Le,
Quoc and others},
journal = {arXiv preprint arXiv:2108.07732},
year = {2021}
}
@InProceedings{ zhou2019going,
title = {“Going on a vacation” takes longer than “Going for a
walk”: A Study of Temporal Commonsense Understanding},
author = {Zhou, Ben and Khashabi, Daniel and Ning, Qiang and Roth,
Dan},
booktitle = {Proceedings of the 2019 Conference on Empirical Methods in
Natural Language Processing and the 9th International Joint
Conference on Natural Language Processing (EMNLP-IJCNLP)},
pages = {3363--3369},
year = {2019}
}
@InProceedings{ roy2015solving,
title = {Solving General Arithmetic Word Problems},
author = {Roy, Subhro and Roth, Dan},
booktitle = {Proceedings of the 2015 Conference on Empirical Methods in
Natural Language Processing},
pages = {1743--1752},
year = {2015}
}
@InProceedings{ lin2020birds,
title = {Birds have four legs?! NumerSense: Probing Numerical
Commonsense Knowledge of Pre-Trained Language Models},
author = {Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and
Ren, Xiang},
booktitle = {Proceedings of the 2020 Conference on Empirical Methods in
Natural Language Processing (EMNLP)},
pages = {6862--6868},
year = {2020}
}
@InProceedings{ mishra2022numglue,
title = {NumGLUE: A Suite of Fundamental yet Challenging
Mathematical Reasoning Tasks},
author = {Mishra, Swaroop and Mitra, Arindam and Varshney, Neeraj
and Sachdeva, Bhavdeep and Clark, Peter and Baral, Chitta
and Kalyan, Ashwin},
booktitle = {Proceedings of the 60th Annual Meeting of the Association
for Computational Linguistics (Volume 1: Long Papers)},
pages = {3505--3523},
year = {2022}
}
@InProceedings{ kushman2014learning,
title = {Learning to automatically solve algebra word problems},
author = {Kushman, Nate and Artzi, Yoav and Zettlemoyer, Luke and
Barzilay, Regina},
booktitle = {Proceedings of the 52nd Annual Meeting of the Association
for Computational Linguistics (Volume 1: Long Papers)},
pages = {271--281},
year = {2014}
}
@Article{ roy2015reasoning,
title = {Reasoning about quantities in natural language},
author = {Roy, Subhro and Vieira, Tim and Roth, Dan},
journal = {Transactions of the Association for Computational
Linguistics},
volume = {3},
pages = {1--13},
year = {2015},
publisher = {MIT Press}
}
@Article{ koncel2015parsing,
title = {Parsing algebraic word problems into equations},
author = {Koncel-Kedziorski, Rik and Hajishirzi, Hannaneh and
Sabharwal, Ashish and Etzioni, Oren and Ang, Siena Dumas},
journal = {Transactions of the Association for Computational
Linguistics},
volume = {3},
pages = {585--597},
year = {2015},
publisher = {MIT Press}
}
@InProceedings{ patel_etal_2021_nlp,
title = "Are {NLP} Models really able to Solve Simple Math Word
Problems?",
author = "Patel, Arkil and Bhattamishra, Satwik and Goyal, Navin",
booktitle = "Proceedings of the 2021 Conference of the North American
Chapter of the Association for Computational Linguistics:
Human Language Technologies",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-main.168",
doi = "10.18653/v1/2021.naacl-main.168",
pages = "2080--2094",
abstract = "The problem of designing NLP solvers for math word
problems (MWP) has seen sustained research activity and
steady gains in the test accuracy. Since existing solvers
achieve high performance on the benchmark datasets for
elementary level MWPs containing one-unknown arithmetic
word problems, such problems are often considered
{``}solved{''} with the bulk of research attention moving
to more complex MWPs. In this paper, we restrict our
attention to English MWPs taught in grades four and lower.
We provide strong evidence that the existing MWP solvers
rely on shallow heuristics to achieve high performance on
the benchmark datasets. To this end, we show that MWP
solvers that do not have access to the question asked in
the MWP can still solve a large fraction of MWPs.
Similarly, models that treat MWPs as bag-of-words can also
achieve surprisingly high accuracy. Further, we introduce a
challenge dataset, SVAMP, created by applying carefully
chosen variations over examples sampled from existing
datasets. The best accuracy achieved by state-of-the-art
models is substantially lower on SVAMP, thus showing that
much remains to be done even for the simplest of the MWPs."
}