-
Notifications
You must be signed in to change notification settings - Fork 2
/
solutions.py
367 lines (336 loc) · 16.4 KB
/
solutions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
import config
import software
import pandas as pd
import files
import os
import markdown
import yaml
import tasks
import static_files
import log
df = None
# Load from disk a table of all the solutions in the database.
# Cache the result so that later runs are faster.
def all ():
global df
if df is None:
json = [ ]
for task_name in files.subfolders( config.tasks_folder ):
for solution_file in files.docs_inside( os.path.join( config.tasks_folder, task_name ) ):
if files.without_extension( solution_file ) == 'description':
continue
bits = files.without_extension( solution_file ).split( ', ' )
software_name = bits[0]
if len( bits ) == 1:
solution_name = 'solution'
else:
solution_name = ', '.join( bits[1:] )
input_file = os.path.join( config.tasks_folder, task_name, solution_file )
next = {
'task name' : task_name,
'software' : software_name,
'solution name' : solution_name,
'solution filename' : solution_file,
'solution path' : config.relativize_path( input_file ),
'solution title' :
f'{task_name} (in {files.without_extension( solution_file )})'
}
markdown_content = markdown.read_doc( input_file )
metadata, content = yaml.split_string( markdown_content )
next['content'] = content + files.modification_text( input_file )
for key, value in metadata.items():
next[key] = value
next['raw content'] = markdown_content
next['permalink'] = config.blogify( next['solution title'] )
json.append( next )
df = pd.DataFrame( json )
# Add links to solution pages to each solution row
def link_to_solution ( solution_row ):
return f'[{solution_row["solution name"]}](../{config.blogify(solution_row["solution title"])})'
df['markdown link'] = df.apply( link_to_solution, axis=1 )
software.check_consistency_with_solutions()
return df
# Clear cache so we're forced to re-examine what's on disk.
# Useful if you just added a solution.
def clear_cache ():
global df
df = None
# Get just those solutions for a specific software package and libraries
# (of for any subset of the given set of libraries, if the second param is True)
def all_for ( software_name, libraries='solution', allow_subsets=False ):
selection = [ ]
for index, row in all().iterrows():
if row['software'] != software_name:
selection.append( False )
elif row['solution name'] == libraries:
selection.append( True )
elif allow_subsets and software.library_subset( row['solution name'], libraries ):
selection.append( True )
else:
selection.append( False )
return all()[selection]
# Private data and function used in class defined below
github_url = 'https://github.com/nathancarter/how2data'
new_github_issue_url = f'{github_url}/issues/new/choose'
def edit_on_github_url ( filename ):
return f'{github_url}/tree/main/{config.relativize_path( filename )}'
# Function for internal use only, by a method of the Solution class.
# Creates transforms to pass to markdown.transform_code().
def _transform_software ( from_sw, to_sw ):
if to_sw in software.using_code:
line1 = f'### You should replace it with {to_sw} code instead.'
line2 = f'### After changing the code above, be sure you get the same'
line3 = f'### Then delete these comments surrounding your code.\n' + \
f'### And delete the old {from_sw} output (below) as well.'
else:
line1 = f'### You should replace this whole block with instructions\n' + \
f'### on how to accomplish the same goal using {to_sw} instead.'
line2 = f'### After replacing this code, be sure your method gets the same'
line3 = f'### When you\'re done, this code and its output should be gone.'
def result ( block_markdown ):
lines = block_markdown.split( '\n' )
return '\n'.join( [
f'```{to_sw}',
f'### The {from_sw} code below has been commented out.',
line1,
f'###',
'',
*[ f'# {line}' for line in lines[1:-2] ],
'',
'###',
line2,
f'### results that the original {from_sw} code gave, which you can',
f'### see on the web at the link given at the top of this file.',
line3,
'```'
] ) + '\n'
return result
# Objects of the following class represent an individual row in the solutions df.
class Solution:
# The row may be an integer index, a solution title, or a row from the df.
# Consider each option below.
def __init__ ( self, row ):
if isinstance( row, pd.Series ):
self.index = row.name
elif isinstance( row, str ):
self.index = dict( zip( all()['solution title'], all().index ) )[row]
elif isinstance( row, int ):
self.index = row
else:
raise TypeError( f"Solutions cannot be constructed from a {type(row)}" )
self._row = all().iloc[self.index,:]
# Getters for all columns in the solutions df
@property
def task_name ( self ):
return self._row['task name']
@property
def software ( self ):
return self._row['software']
@property
def solution_name ( self ):
return self._row['solution name']
@property
def solution_filename ( self ):
return self._row['solution filename']
@property
def solution_path ( self ):
return self._row['solution path']
@property
def solution_title ( self ):
return self._row['solution title']
@property
def content ( self ):
return self._row['content']
@property
def author ( self ):
return self._row['author']
@property
def raw_content ( self ):
return self._row['raw content']
@property
def permalink ( self ):
return self._row['permalink']
@property
def markdown_link ( self ):
return self._row['markdown link']
# And a getter for the whole row
@property
def row ( self ):
return self._row
# And for its online link in the live website
@property
def url ( self ):
return config.site_link( self.permalink )
# And for its content, but without the last modified date
@property
def inner_content ( self ):
result = self.content.split( '\n' )
if result[-1].startswith( 'Content last modified' ):
result = result[:-1]
return '\n'.join( result )
# What file in the database defines this solution? The result is an absolute path.
# We assume we're working in the subfolder of the database folder defined in config.py,
# the specific subfolder containing the task for this solution.
# But you can choose a different containing folder for the solution file with the second argument.
def input_file ( self, folder=None ):
if folder is None:
folder = os.path.join( config.tasks_folder, self.task_name )
return os.path.join( folder, self.solution_filename )
# What file does this solution generate? The result is an absolute path.
# By default, the result will be in the jekyll input folder defined in config.py.
# But you can choose a different output folder with the second argument.
def output_file ( self, folder=None ):
if folder is None:
folder = config.jekyll_input_folder
return os.path.join( folder, self.permalink + '.md' )
# What file in the database describes this solution's task? The result is an absolute path.
# We assume we're working in the subfolder of the database folder defined in config.py,
# the specific subfolder containing the task for this solution.
# But you can choose a different containing folder for the solution file with the second argument.
def task_file ( self, folder=None ):
if folder is None:
folder = os.path.join( config.tasks_folder, self.task_name )
return markdown.get_unique_doc( os.path.join( folder, 'description' ) )
# Get the Task instance associated with this Solution.
def task ( self ):
return tasks.Task( tasks.all()[tasks.all()['task name'] == self.task_name].iloc[0] )
# Must this solution be rebuilt? Timestamps of relevant files are checked.
# Parameters customize what folders are passed to input_file(), output_file(), and task_file().
def must_build ( self, in_folder=None, out_folder=None ):
return files.must_rebuild( self.input_file( in_folder ), self.output_file( out_folder ) ) \
or files.must_rebuild( self.task_file( in_folder ), self.output_file( out_folder ) )
# Build the markdown content this solution generates but just return it as a string.
# Parameter 2 customizes what folder is passed to input_file().
# Parameter 3 can be used to provide a Task, so we don't have to look one up.
def build_text ( self, folder=None, task=None ):
if folder is None:
folder = os.path.join( config.tasks_folder, self.task_name )
input_file = self.input_file( folder )
content = markdown.adjust_image_filenames(
tasks.image_link_adjuster( self.task_name ),
tasks.make_links( self.content ) )
content += f'\n\nSee a problem? [Tell us]({new_github_issue_url}) or ' + \
f'[edit the source]({edit_on_github_url(input_file)}).'
if task is None:
task = self.task()
if type( self.author ) == str:
contributors = f'Contributed by {self.author}'
else:
try:
contributors = 'Contributed by:\n\n' + \
'\n'.join( [ f' * {author}' for author in self.author ] )
except TypeError:
contributors = ''
return static_files.fill_template( 'solution',
TITLE = self.solution_title,
PERMALINK = self.permalink,
TASK_PAGE_LINK = f'[See all solutions.](../{task.permalink})',
DESCRIPTION =
markdown.adjust_image_filenames(
tasks.image_link_adjuster( self.task_name ),
tasks.make_links( task.content ) ),
MARKDOWN_CONTENT = markdown.wrap_in_html_comments( markdown.run(
content, folder, self.software ) ),
CONTRIBUTORS = contributors
)
# Build the markdown content this solution generates and save it to disk.
# Parameters customize what folders are passed to input_file(), output_file(), and build_text().
def build_file ( self, in_folder=None, out_folder=None, task=None ):
markdown_content = self.build_text( in_folder, task )
output_file = self.output_file( out_folder )
markdown.write( output_file, markdown_content )
log.built( 'solution for', self.task_name,
Software=self.software,
Solution=self.solution_name )
files.mark_as_regenerated( self.permalink + '.md' )
return output_file
# Run build_file() if needed.
# Otherwise, log that it wasn't needed, but mark the file as up-to-date.
def build ( self, in_folder=None, out_folder=None, task=None, force=False ):
if force or self.must_build( in_folder, out_folder ):
self.build_file( in_folder, out_folder, task )
else:
log.not_built( self.output_file( out_folder ),
self.input_file( in_folder ),
self.task_file( in_folder ) )
files.mark_as_regenerated( self.permalink + '.md' )
# Read from disk the most recent generated markdown content for this solution.
# This is the entire file, not just the "body"; see the next function for getting the body only.
# Parameter customizes what folder is passed to output_file().
def generated_markdown ( self, folder=None ):
return files.read_text_file( self.output_file( folder ) )
# Same as previous function, but the solution body only (no description, footer, etc.).
def generated_body ( self, folder=None ):
return markdown.unwrap_from_html_comments( self.generated_markdown( folder ) )
# Save a copy of this Solution, but rewritten as an assignment for an expert user
# to do by rewriting this solution in the given piece of software. That is, we add
# an instructions header that explains how to rewrite the solution into the new
# software, and we store the solution with that new header in a file in the same
# folder as this sotluion, but in the given format. All code blocks in this
# solution are also edited so that they explain to the user how to rewrite them.
# The software parameter is a string, such as "Python, using sklearn" or "Julia".
# The format parameter is a string, as markdown.export() expects, such as
# "Rmd" or "ipynb" or "docx".
def make_assignment ( self, new_software, format ):
# Get markdown content of this solution
content = self.inner_content
if format not in config.extension_for_type:
log.error( 'Conversion not supported for this format',
Format=format )
format = config.extension_for_type[format]
abs_filename = os.path.join( self.task().folder, new_software )
content_to_edit = markdown.transform_code( content,
_transform_software( self.software, new_software ) )
instructions = f'''
# This file should help you write a solution to the task {self.task_name} in {new_software}
## Before you begin
1. Read the full description of the "{self.task_name}" task. You can find it
[on this page]({self.task().url}) of the How to Data website.
2. Read the solution below, which was written using {self.software}. You can see
that same solution displayed more nicely on the same page from the site,
or you can view it in isolation [on this page]({self.url}).
## Doing the work
'''[1:]
if self.software in software.using_code and new_software in software.using_code:
instructions += f'''
1. Your job is to replace the old code in {self.software} with new code in {new_software}.
This may also require altering some of the that sits outside code blocks.
'''[1:]
elif self.software in software.using_code:
instructions += f'''
1. Your job is to replace the code blocks with written instructions, because
{self.software} is a programming language but {new_software} does not use code.
This requires not only removing all code blocks, but also extending the text that
currently sits outside the code blocks to include new, step-by-step instructions
of what to click or type in {new_software}.
'''[1:]
elif new_software in software.using_code:
instructions += f'''
1. Your job is to replace the instructions about {self.software} with code blocks in
{new_software}, plus some explanations about what the code you add is doing.
'''[1:]
else:
instructions += f'''
1. Your job is to replace the instructions that were written for {self.software}
with instructions for {new_software}.
'''[1:]
instructions += f'''
2. *Try to change as little as possible,* so that the two solutions are
as similar as they can reasonably be. This lets readers of the website
more easily compare the two side-by-side. Obviously, you'll have to remove any
reference to {self.software}, but make your result as parallel to the original
as you can.
3. Use the same example problems whenever possible, and ensure that you get
the same result in {new_software} as the original solution shows for {self.software}.
## How to submit your finished solution
1. Delete these instructions, that is, everything up to the horizontal line
you see below.'''
if format == 'ipynb':
instructions += \
' That should mean deleting just this one, big markdown cell.'
instructions += f'''\n
2. Email this file to the site maintainer, or if you are familiar with git and
GitHub, you can commit this file and then create a pull request.
-------------------
'''
return markdown.export( instructions + content_to_edit, abs_filename, format )