Skip to content

Commit

Permalink
Update configs
Browse files Browse the repository at this point in the history
  • Loading branch information
cangermueller committed Nov 17, 2024
1 parent b4596ab commit dd3b2cf
Show file tree
Hide file tree
Showing 17 changed files with 270 additions and 83 deletions.
2 changes: 1 addition & 1 deletion bash/general.txt
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ args=${args[*]}
man bash -> Parameter Expansion
${!v*} // expand variable name
${v:-value} // assign value if undefined
${v:?msg} // print msg if unset
${v?msg} // print msg if unset; ${1?"First parameter must be set"}
${file.tar.gz%.*} // file.tar
${file.tar.gz%%.*} // file
${file.tar.gz#*.} // tar.gz
Expand Down
16 changes: 16 additions & 0 deletions bash/if.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,19 @@ a == 'test' // works, even if a=
expr && expr // and; a -
expr || expr // or
!expr // not


Check if list contains value
============================
values=(
"value1"
"value2"
"value3"
"value4"
)
value="value2"
if [[ ${values[*]} =~ (^|[[:space:]])$value($|[[:space:]]) ]] then
echo 'yes'
else
echo 'no'
fi
5 changes: 5 additions & 0 deletions python/abslflags.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,8 @@ from absl.testing import flagsaver
## context
with flagsaver.flagsaver(...):
with flagsaver.as_parsed(...):


# --undefok=flagname1,flagname2
Avoid throwing an error if --flagname1 is used although no correspondig absl
flag is defined
1 change: 1 addition & 0 deletions python/absltest.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ assertBetween(value, min, max) // checks if value is in range (max inclusive)
assertContainsSubset(expected_subset, actual_superset)
assertContainsSubsequence(list, sub_list) // [0, 1, 0, 2, 0, 3] contains [1, 2, 3]
assertContainsExactSubsequence(list, sub_list) // [0, 1, 0, 2, 0, 3] contains [1, 0, 2]
assertDataclassEqual(d1, d2) // compare dataclasses; nicer error message

## logs
with self.assertLogs(level=logging.INFO) as logs:
Expand Down
1 change: 0 additions & 1 deletion python/beam.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ CoGroupByKey() // Groups only [(k1, v1), (k1, v2), (k3, v3)] -> [(k1, [v1, v2],
* Yields pairs that exist in *either* of the two tables; values of one of the tables can be missing



# IO

io.ReadFromText(
Expand Down
1 change: 1 addition & 0 deletions python/collections.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ d = OrderedDict([['a', 1], ['b', 2]]) // Initialize ordered via list of tuples
d.popitem() // removed last inserted item

# deque
* See queue.txt for more details
d = deque([1, 2, 3])
append(x) // append single item right
appendleft(x)
Expand Down
38 changes: 38 additions & 0 deletions python/general.txt
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ values: tuple[int, ...]


# match/case
henryiii.github.io/level-up-your-python/notebooks/2.8%20Pattern%20Matching
docs.python.org/3/whatsnew/3.10.html#pep-634-structural-pattern-matching

match value:
Expand Down Expand Up @@ -206,3 +207,40 @@ match value:
print('some int')
case _:
print('else')


## Examples

class Foo:

def __init__(self, a):
self.a = a


class Foo2:
__match_args__ = ('a', )

def __init__(self, a):
self.a = a


def match_type(x: Any):
match x:
case int() if x < 3:
print('Int < 3')
case int(4):
print('Int 4')
case str() if len(x) < 3: # conditional; if is "guard"
print('Str < 3')
case dict({'a': a}): # Matches any dict that contains 'a'
print('Dict {a: %d}' % a)
case dict({'b': b, 'c': c}):
print('Dict {a: %d, b: %d}' % (b, c))
case float() | str():
print('Float or str')
case Foo(): # An instance of this class; alternative to isinstance
print('Foo')
case Foo2(x): # capturing x requires defining __match_args__
print('Foo2 with %d' % x)
case _:
print('Unknown')
2 changes: 1 addition & 1 deletion python/heapq.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Heapq
* Operates on a list; not separate data structure!
* items are compared by usual comparator, e.g 1 > 2; (1, 'a') < (3, 'b')
* items are compared by usual comparator, e.g 1 < 2; (1, 'b') < (3, 'a')
* Use dataclass for storing special structures: https://docs.python.org/3/library/heapq.html#priority-queue-implementation-notes


Expand Down
2 changes: 1 addition & 1 deletion python/numpy.txt
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ random.choice(array, size, replace=False) // sample (without) replacement from


## Random generator
rng = np.random.default_rng(0)
rng = np.random.default_rng(0 | None)
rng.(uniform|integer|normal|choice|permutation|...)
isinstance(rng, np.random.Generator) == True
isinstance(rng, np.random.RandomState) == False
Expand Down
40 changes: 27 additions & 13 deletions python/pandas.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ decode('utf') // bytes -> unicode
cat(sep=":', na_rep='') // concatenate values
sep= // field separator
na_rep=None // if None, to sep is inserted / values ignored
wrap(max_width) // inserts \n after max_width


# Delete, exclude, drop columns
Expand All @@ -84,6 +85,8 @@ df.corr(...) // computes correlation between all columns
df.apply(fun, axis=0) // apply function over axis
axis=0 // over rows -> return value for each column
axis=1 // over columns -> return value for each row
result_type='expand' // broadcast dict / list to DataFrame
df.map // apply function to every element of DataFrame

# sample DataFrame
df.sample(n, ...)
Expand Down Expand Up @@ -522,12 +525,6 @@ pd.concat([d1, d2], axis=0, ...) // join data.frames on indices
keys=['d1', 'd2'] // create Multiindex with these outer level values
names=['outer', 'inner'] // names of levels in Multiindex

## Stacking two frames like np.concat
* Make sure than index is equal
a.index = range(len(a))
b.index = a.index
ab = pd.concat((a, b), axis=1)


# Joining 2 DataFrames on COLUMNS
* Join by COLUMNS! (not index as pd.concat)
Expand Down Expand Up @@ -708,6 +705,7 @@ df.values // numpy array used internally
df.values.nbytes // memory usage
df.sort_values('column', ascending=True, inplace=False) // sort by column(s)
ascinding=[True, False] // multiple columns
df.sort_values([('level1_name, 'leve2_name')]) // sort by multiindex
df.sort_index(axis=0, inplace=False) // sort by row index
df.sort_index(axis=1) // sort columns by names
df.sort_index(by='column') // sort by column -> like sort
Expand Down Expand Up @@ -988,12 +986,6 @@ series = to_datetime(pd.Series(['2010-01-01', '2010-02-01', '2010-03-01']))
series.dt // returns object to access datetime properties


# Applying functions
.apply(lambda row: row + 1, axis=1) // apply over rows or columns
.applymap(lambda x: x + 1) // apply on single elements
.pipe(lambda df: df.iloc[:2]) // apply on entire dataframe


# Replacing columns
df.set_axis(new_columns, 1)

Expand Down Expand Up @@ -1051,7 +1043,6 @@ df.style.
# Maps values to a CSS style
max_value = values.max()
return ['background-color:yellow' if v == max_value else '' for v in values]
.applymap(fn) // applies to values independently
.set_caption(caption) // title
.bar(cmap='viridis', width=100, ...)
.set_properties{**{'text-align': 'center'}) // set CSS attributes
Expand All @@ -1073,3 +1064,26 @@ def show_corr(corr, metric='spearmanr', absolute=True, caption=None):
.set_table_styles(
[{'selector': 'th', 'props': [('border', '1px solid black')]}])
)


# to_dict / from_dict
df.to_dict(orient='records') // rows as dicts [{'c1': v1, 'c2': v2, ...}]
df.to_dict(orient='list') // columns as dicts [{'c1:' [v1, v2, ...]}]
pd.DataFrame(dicts) // from_dict


# Column of dicts -> DataFrame
pd.json_normalize(df['dict'])

# Convert to list / tuples
df.values.tolist()
df.itertuples(index=False, name=None)


# Iterating over rows
index, series = df.iterrows() // returns Series; dtype of Series can change over rows
named_tuples = df.itertuples(index=True) // returns NamedTuples; more efficient
getattr(named_tuple, 'column') // acccess by string
named_tuple._asdict()['column'] // acccess by string, less efficient than getattr
series = df.items() // iterate over columns
df.to_dict(orient='records') // returns list instead of Iterator (inefficient)
58 changes: 58 additions & 0 deletions python/queue.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Queue

## collections.dequeue
* queue: only supports removing elements from the back
* dequeue: also supports removing elements form the front

import collections

q = collections.deque([1, 2, 3], maxlen=None)
q.append(4)
q.extend([5, 6])
while q:
print(q.popleft())


## queue.Queue
* Used for synchronizing threads
* thread-safe
* Use collections.dequeue otherwise

import queue

q = queue.Queue(maxsize=0)
q.put(1)
q.put(2)
while not q.empty():
print(q.get())


# Priority queue

## heapq
* see heapq.txt

q = []
heapq.heappush(q, 3)
heapq.heappush(q, 1)
heapq.heappush(q, 2)

q = [3, 1, 2]
heapq.heapify(q) // in-place

while q:
print(heapq.heappop(q))

## queue.PriorityQueue
* For threads (like queue.Queue)
* Wrapper around heapq

pq = queue.PriorityQueue()

pq.put((1, "Task 1"))
pq.put((3, "Task 3"))
pq.put((2, "Task 2"))

print(pq.get()) # Output: (1, 'Task 1')
print(pq.get()) # Output: (2, 'Task 2')
print(pq.get()) # Output: (3, 'Task 3')
67 changes: 51 additions & 16 deletions python/re.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,15 @@ for m in re.finditer(pattern, string) // like search, but iter of all hits
re.findall(pattern, string) // all matches as str in array (see below)
len(re.findall(...)) // count
re.split(p, s) // split by p; s.split(p) does not support re


# re.sub
re.sub(p, r, s) // subst ALL p in s by r and returns substituted string
re.sub(r'(.*\.)(solver)$', r'\g<1>\g<2>_cls', idx) // reference groups
re.sub(r'(.)\1{2,}', r'\1', 'aaabbbccc') // replace duplicate chars
re.subn() // like sub, but return number of replacements
re.sub(r'(.*\.)(solver)$', r'\g<1>\g<2>_cls', idx) // reference groups
* use \g<1> instead of \1 since '\123' does not replace by (group1)23!
// Backslash must be escaped in the replacement, not with re.escape!
re.sub(regex, r'\g<1>' + repl.replace('\\', '\\\\') + r'\g<2>', text)


# Groups
Expand Down Expand Up @@ -51,13 +56,20 @@ d['start'], d['stop']
\S: opposite of \s [^\s]
\w: alphanumeric [a-zA-Z0-9_].
\W: opposite of \w
\b: Word boundary (end/beginning of string; before/after whitespace)


# Flags
r'(?ism)...' // to express options in regex
re.I | re.IGNORECASE // insensitive
re.S | re.DOTALL // . matches also \n (it matches everything but . by default)
re.M | re.MULTILINE // ^ matches also is beginning of a line, not only the entire string
re.M | re.MULTILINE // ^ matches also beginning of a line, not only the entire string
re.X | re.VERBOSE // ignores any whitespaces (' ' or \n) except '[ ]' and '\ ';
// # must be escaped!
re.compile(r'''
\d + # digests
[ ] \ # two spaces
''', re.X) == re.compile(r'\d+[ ] ')

# Matching string over multiple lines
* `.` does not match \n by default
Expand Down Expand Up @@ -91,32 +103,55 @@ re.findall('[\w]+', source)


# Capture groups
(?P<name>regex): named capture group (for m.groupdict())
(?:regex): group that is ignored (not in m.groups() / m.groupdict())
(?=regex): positive lookahead; group that is not included m.group()
* re.search(f'[a-z]+(?=\d+)', ' abc213 ').group() == 'abc'

## Exclude a sub-string that must exist (positive lookahead assertion)
re.search(r'[^-]+(?=bar)', 'abcbar').group() -> abc; group() == group(0)
re.search(r'[^-]+(?=bar)', 'abcba') // fails since string does not end with 'bar'

## Ignore a group (non-capturing group)
re.search(r'(set|let) var = (\\w+|\\d+)', 'set var = 1') -> ('set', '1')
re.search(r'(?:set|let) var = (\\w+|\\d+)', 'set var = 1') -> ('1',)

## Named groups
## ?P<name> Named groups
m = re.fullmatch(r'(--)?(?P<name>[^=]+)(=(?P<value>[^=]+))?', flag_string)
m.group('name')
m.group('value') // None if missing
m.groups() // returns tuple of values
.m.groupdict() returns dict mapping group names to group values
.m.group('name')

## ?: Ignore a group (non-capturing group)
re.search(r'(set|let) var = (\\w+|\\d+)', 'set var = 1') -> ('set', '1')
re.search(r'(?:set|let) var = (\\w+|\\d+)', 'set var = 1') -> ('1',)

## ?= Exclude a sub-string that must exist (positive lookahead assertion)
re.search(r'[^-]+(?=bar)', 'abcbar').group() -> abc; group() == group(0)
re.search(r'[^-]+(?=bar)', 'abcba') // fails since string does not end with 'bar'

## Replace digests that are followed characters by 'DIGEST' (not the characters)
* Use !lookahead! for things that are not replace
re.sub(r'\d+(?=[a-z]+)', 'DIGESTS|', '123abc') == 'DIGEST|abc'

## Replace characters that are preceeded by by digests by '|CHARS (not the digests)
* Use !lookbehind! expression
* Lookbind expression must have fixed-width; \d+ does not work, only \d
re.sub(r'(?<=\d)[a-z]+', '|CHARS', '123abc') == '123|CHARS'

# Escaping / raw string
* Enables using backslash '\' in string without escaping
* Use in regex when it is necessary to match '\'
print('123\345') // 123å
print(r'123\345') // 123/345

# Raw string (r'', r"", r""" """)
* \ has no effect
print(r'\n') == '\n'
print(r'\\n') == '\\n'
display(r'\n') == '\\n' // display (__repr__) escapes backslashes!

re.fullmatch(r'\d+\\\d+', r'123\456') # matches
re.fullmatch('\d+\\\d+', r'123\456') # does not match (re is not r)
re.fullmatch('\d+\\\\\d+', r'123\456') # matches
re.fullmatch('\d+ \d+', '123 456') # r not required for \d, \s, or \w

# Triple quote
* Allows line breaks (newline \n)
* Newline can be escaped unless in raw string
print('''\
a''') == 'a'

print(r'''\
a''') == '\
a'
1 change: 0 additions & 1 deletion python/strings.py

This file was deleted.

Loading

0 comments on commit dd3b2cf

Please sign in to comment.