Update configs
cangermueller committed Nov 17, 2024
commit dd3b2cf
man bash -> Parameter Expansion
${!v*} // expand variable name
${v:-value} // assign value if undefined
${v:?msg} // print msg if unset
${v?msg} // print msg if unset; ${1?"First parameter must be set"}
${file.tar.gz%.*} // file.tar
${file.tar.gz%%.*} // file
${file.tar.gz#*.} // tar.gz
expr && expr // and; a -
expr || expr // or
!expr // not

Check if list contains value
if [[ ${values[*]} =~ (^|[[:space:]])$value($|[[:space:]]) ]] then
echo 'yes'
echo 'no'
## context
with flagsaver.flagsaver(...):
with flagsaver.as_parsed(...):

# --undefok=flagname1,flagname2
Avoid throwing an error if --flagname1 is used although no correspondig absl
flag is defined
assertContainsSubset(expected_subset, actual_superset)
assertContainsSubsequence(list, sub_list) // [0, 1, 0, 2, 0, 3] contains [1, 2, 3]
assertContainsExactSubsequence(list, sub_list) // [0, 1, 0, 2, 0, 3] contains [1, 0, 2]
assertDataclassEqual(d1, d2) // compare dataclasses; nicer error message

## logs
with self.assertLogs(level=logging.INFO) as logs:
* Yields pairs that exist in *either* of the two tables; values of one of the tables can be missing

# IO

d.popitem() // removed last inserted item

# deque
* See queue.txt for more details
d = deque([1, 2, 3])
append(x) // append single item right
# match/case

match value:
Expand Down Expand Up @@ -206,3 +207,40 @@ match value:
print('some int')
case _:

## Examples

class Foo:

def __init__(self, a):
self.a = a

class Foo2:
__match_args__ = ('a', )

def __init__(self, a):
self.a = a

def match_type(x: Any):
match x:
case int() if x < 3:
print('Int < 3')
case int(4):
print('Int 4')
case str() if len(x) < 3: # conditional; if is "guard"
print('Str < 3')
case dict({'a': a}): # Matches any dict that contains 'a'
print('Dict {a: %d}' % a)
case dict({'b': b, 'c': c}):
print('Dict {a: %d, b: %d}' % (b, c))
case float() | str():
print('Float or str')
case Foo(): # An instance of this class; alternative to isinstance
case Foo2(x): # capturing x requires defining __match_args__
print('Foo2 with %d' % x)
case _:
# Heapq
* Operates on a list; not separate data structure!
* items are compared by usual comparator, e.g 1 > 2; (1, 'a') < (3, 'b')
* items are compared by usual comparator, e.g 1 < 2; (1, 'b') < (3, 'a')
* Use dataclass for storing special structures:

## Random generator
rng = np.random.default_rng(0)
rng = np.random.default_rng(0 | None)
isinstance(rng, np.random.Generator) == True
isinstance(rng, np.random.RandomState) == False
cat(sep=":', na_rep='') // concatenate values
sep= // field separator
na_rep=None // if None, to sep is inserted / values ignored
wrap(max_width) // inserts \n after max_width

# Delete, exclude, drop columns
Expand All @@ -84,6 +85,8 @@ df.corr(...) // computes correlation between all columns
df.apply(fun, axis=0) // apply function over axis
axis=0 // over rows -> return value for each column
axis=1 // over columns -> return value for each row
result_type='expand' // broadcast dict / list to DataFrame // apply function to every element of DataFrame

# sample DataFrame
df.sample(n, ...)
Expand Down Expand Up @@ -522,12 +525,6 @@ pd.concat([d1, d2], axis=0, ...) // join data.frames on indices
keys=['d1', 'd2'] // create Multiindex with these outer level values
names=['outer', 'inner'] // names of levels in Multiindex

## Stacking two frames like np.concat
* Make sure than index is equal
a.index = range(len(a))
b.index = a.index
ab = pd.concat((a, b), axis=1)

# Joining 2 DataFrames on COLUMNS
* Join by COLUMNS! (not index as pd.concat)
Expand Down Expand Up @@ -708,6 +705,7 @@ df.values // numpy array used internally
df.values.nbytes // memory usage
df.sort_values('column', ascending=True, inplace=False) // sort by column(s)
ascinding=[True, False] // multiple columns
df.sort_values([('level1_name, 'leve2_name')]) // sort by multiindex
df.sort_index(axis=0, inplace=False) // sort by row index
df.sort_index(axis=1) // sort columns by names
df.sort_index(by='column') // sort by column -> like sort
Expand Down Expand Up @@ -988,12 +986,6 @@ series = to_datetime(pd.Series(['2010-01-01', '2010-02-01', '2010-03-01']))
series.dt // returns object to access datetime properties

# Applying functions
.apply(lambda row: row + 1, axis=1) // apply over rows or columns
.applymap(lambda x: x + 1) // apply on single elements
.pipe(lambda df: df.iloc[:2]) // apply on entire dataframe

# Replacing columns
df.set_axis(new_columns, 1)

Expand Down Expand Up @@ -1051,7 +1043,6 @@
# Maps values to a CSS style
max_value = values.max()
return ['background-color:yellow' if v == max_value else '' for v in values]
.applymap(fn) // applies to values independently
.set_caption(caption) // title
.bar(cmap='viridis', width=100, ...)
.set_properties{**{'text-align': 'center'}) // set CSS attributes
Expand All @@ -1073,3 +1064,26 @@ def show_corr(corr, metric='spearmanr', absolute=True, caption=None):
[{'selector': 'th', 'props': [('border', '1px solid black')]}])

# to_dict / from_dict
df.to_dict(orient='records') // rows as dicts [{'c1': v1, 'c2': v2, ...}]
df.to_dict(orient='list') // columns as dicts [{'c1:' [v1, v2, ...]}]
pd.DataFrame(dicts) // from_dict

# Column of dicts -> DataFrame

# Convert to list / tuples
df.itertuples(index=False, name=None)

# Iterating over rows
index, series = df.iterrows() // returns Series; dtype of Series can change over rows
named_tuples = df.itertuples(index=True) // returns NamedTuples; more efficient
getattr(named_tuple, 'column') // acccess by string
named_tuple._asdict()['column'] // acccess by string, less efficient than getattr
series = df.items() // iterate over columns
df.to_dict(orient='records') // returns list instead of Iterator (inefficient)
# Queue

## collections.dequeue
* queue: only supports removing elements from the back
* dequeue: also supports removing elements form the front

import collections

q = collections.deque([1, 2, 3], maxlen=None)
q.extend([5, 6])
while q:

## queue.Queue
* Used for synchronizing threads
* thread-safe
* Use collections.dequeue otherwise

import queue

q = queue.Queue(maxsize=0)
while not q.empty():

# Priority queue

## heapq
* see heapq.txt

q = []
heapq.heappush(q, 3)
heapq.heappush(q, 1)
heapq.heappush(q, 2)

q = [3, 1, 2]
heapq.heapify(q) // in-place

while q:

## queue.PriorityQueue
* For threads (like queue.Queue)
* Wrapper around heapq

pq = queue.PriorityQueue()

pq.put((1, "Task 1"))
pq.put((3, "Task 3"))
pq.put((2, "Task 2"))

print(pq.get()) # Output: (1, 'Task 1')
print(pq.get()) # Output: (2, 'Task 2')
print(pq.get()) # Output: (3, 'Task 3')
re.findall(pattern, string) // all matches as str in array (see below)
len(re.findall(...)) // count
re.split(p, s) // split by p; s.split(p) does not support re

# re.sub
re.sub(p, r, s) // subst ALL p in s by r and returns substituted string
re.sub(r'(.*\.)(solver)$', r'\g<1>\g<2>_cls', idx) // reference groups
re.sub(r'(.)\1{2,}', r'\1', 'aaabbbccc') // replace duplicate chars
re.subn() // like sub, but return number of replacements
re.sub(r'(.*\.)(solver)$', r'\g<1>\g<2>_cls', idx) // reference groups
* use \g<1> instead of \1 since '\123' does not replace by (group1)23!
// Backslash must be escaped in the replacement, not with re.escape!
re.sub(regex, r'\g<1>' + repl.replace('\\', '\\\\') + r'\g<2>', text)

# Groups
Expand Down Expand Up @@ -51,13 +56,20 @@ d['start'], d['stop']
\S: opposite of \s [^\s]
\w: alphanumeric [a-zA-Z0-9_].
\W: opposite of \w
\b: Word boundary (end/beginning of string; before/after whitespace)

# Flags
r'(?ism)...' // to express options in regex
re.I | re.IGNORECASE // insensitive
re.S | re.DOTALL // . matches also \n (it matches everything but . by default)
re.M | re.MULTILINE // ^ matches also is beginning of a line, not only the entire string
re.M | re.MULTILINE // ^ matches also beginning of a line, not only the entire string
re.X | re.VERBOSE // ignores any whitespaces (' ' or \n) except '[ ]' and '\ ';
// # must be escaped!
\d + # digests
[ ] \ # two spaces
''', re.X) == re.compile(r'\d+[ ] ')

# Matching string over multiple lines
* `.` does not match \n by default
Expand Down Expand Up @@ -91,32 +103,55 @@ re.findall('[\w]+', source)

# Capture groups
(?P<name>regex): named capture group (for m.groupdict())
(?:regex): group that is ignored (not in m.groups() / m.groupdict())
(?=regex): positive lookahead; group that is not included
*'[a-z]+(?=\d+)', ' abc213 ').group() == 'abc'

## Exclude a sub-string that must exist (positive lookahead assertion)'[^-]+(?=bar)', 'abcbar').group() -> abc; group() == group(0)'[^-]+(?=bar)', 'abcba') // fails since string does not end with 'bar'

## Ignore a group (non-capturing group)'(set|let) var = (\\w+|\\d+)', 'set var = 1') -> ('set', '1')'(?:set|let) var = (\\w+|\\d+)', 'set var = 1') -> ('1',)

## Named groups
## ?P<name> Named groups
m = re.fullmatch(r'(--)?(?P<name>[^=]+)(=(?P<value>[^=]+))?', flag_string)'name')'value') // None if missing
m.groups() // returns tuple of values
.m.groupdict() returns dict mapping group names to group values'name')

## ?: Ignore a group (non-capturing group)'(set|let) var = (\\w+|\\d+)', 'set var = 1') -> ('set', '1')'(?:set|let) var = (\\w+|\\d+)', 'set var = 1') -> ('1',)

## ?= Exclude a sub-string that must exist (positive lookahead assertion)'[^-]+(?=bar)', 'abcbar').group() -> abc; group() == group(0)'[^-]+(?=bar)', 'abcba') // fails since string does not end with 'bar'

## Replace digests that are followed characters by 'DIGEST' (not the characters)
* Use !lookahead! for things that are not replace
re.sub(r'\d+(?=[a-z]+)', 'DIGESTS|', '123abc') == 'DIGEST|abc'

## Replace characters that are preceeded by by digests by '|CHARS (not the digests)
* Use !lookbehind! expression
* Lookbind expression must have fixed-width; \d+ does not work, only \d
re.sub(r'(?<=\d)[a-z]+', '|CHARS', '123abc') == '123|CHARS'

# Escaping / raw string
* Enables using backslash '\' in string without escaping
* Use in regex when it is necessary to match '\'
print('123\345') // 123å
print(r'123\345') // 123/345

# Raw string (r'', r"", r""" """)
* \ has no effect
print(r'\n') == '\n'
print(r'\\n') == '\\n'
display(r'\n') == '\\n' // display (__repr__) escapes backslashes!

re.fullmatch(r'\d+\\\d+', r'123\456') # matches
re.fullmatch('\d+\\\d+', r'123\456') # does not match (re is not r)
re.fullmatch('\d+\\\\\d+', r'123\456') # matches
re.fullmatch('\d+ \d+', '123 456') # r not required for \d, \s, or \w

# Triple quote
* Allows line breaks (newline \n)
* Newline can be escaped unless in raw string
a''') == 'a'

a''') == '\
