Update configs

cangermueller · Nov 17, 2024 · dd3b2cf · dd3b2cf
1 parent b4596ab
commit dd3b2cf
Show file tree

Hide file tree

Showing 17 changed files with 270 additions and 83 deletions.
diff --git a/bash/general.txt b/bash/general.txt
@@ -124,7 +124,7 @@ args=${args[*]}
 man bash -> Parameter Expansion
 ${!v*}  // expand variable name
 ${v:-value} // assign value if undefined
-${v:?msg}  // print msg if unset
+${v?msg}  // print msg if unset; ${1?"First parameter must be set"}
 ${file.tar.gz%.*} // file.tar
 ${file.tar.gz%%.*} // file
 ${file.tar.gz#*.} // tar.gz

diff --git a/bash/if.txt b/bash/if.txt
@@ -66,3 +66,19 @@ a == 'test' // works, even if a=
 expr && expr  // and; a -
 expr || expr // or
 !expr // not
+
+
+Check if list contains value
+============================
+values=(
+  "value1"
+  "value2"
+  "value3"
+  "value4"
+)
+value="value2"
+if [[ ${values[*]} =~ (^|[[:space:]])$value($|[[:space:]]) ]] then
+  echo 'yes'
+else
+  echo 'no'
+fi
diff --git a/python/abslflags.txt b/python/abslflags.txt
@@ -70,3 +70,8 @@ from absl.testing import flagsaver
 ## context
 with flagsaver.flagsaver(...):
 with flagsaver.as_parsed(...):
+
+
+# --undefok=flagname1,flagname2
+Avoid throwing an error if --flagname1 is used although no correspondig absl
+flag is defined
diff --git a/python/absltest.txt b/python/absltest.txt
@@ -57,6 +57,7 @@ assertBetween(value, min, max) // checks if value is in range (max inclusive)
 assertContainsSubset(expected_subset, actual_superset)
 assertContainsSubsequence(list, sub_list) // [0, 1, 0, 2, 0, 3] contains [1, 2, 3]
 assertContainsExactSubsequence(list, sub_list) // [0, 1, 0, 2, 0, 3] contains [1, 0, 2]
+assertDataclassEqual(d1, d2) // compare dataclasses; nicer error message
 
 ## logs
 with self.assertLogs(level=logging.INFO) as logs:

diff --git a/python/beam.txt b/python/beam.txt
@@ -10,7 +10,6 @@ CoGroupByKey()  // Groups only [(k1, v1), (k1, v2), (k3, v3)] -> [(k1, [v1, v2],
   * Yields pairs that exist in *either* of the two tables; values of one of the tables can be missing
 
 
-
 # IO
 
 io.ReadFromText(

diff --git a/python/collections.txt b/python/collections.txt
@@ -11,6 +11,7 @@ d = OrderedDict([['a', 1], ['b', 2]]) // Initialize ordered via list of tuples
 d.popitem() // removed last inserted item
 
 # deque
+* See queue.txt for more details
 d = deque([1, 2, 3])
 append(x) // append single item right
 appendleft(x)

diff --git a/python/general.txt b/python/general.txt
@@ -179,6 +179,7 @@ values: tuple[int, ...]
 
 
 # match/case
+henryiii.github.io/level-up-your-python/notebooks/2.8%20Pattern%20Matching
 docs.python.org/3/whatsnew/3.10.html#pep-634-structural-pattern-matching
 
 match value:
@@ -206,3 +207,40 @@ match value:
     print('some int')
   case _:
     print('else')
+
+
+## Examples
+
+class Foo:
+
+  def __init__(self, a):
+    self.a = a
+
+
+class Foo2:
+  __match_args__ = ('a', )
+
+  def __init__(self, a):
+    self.a = a
+
+
+def match_type(x: Any):
+  match x:
+    case int() if x < 3:
+      print('Int < 3')
+    case int(4):
+      print('Int 4')
+    case str() if len(x) < 3: # conditional; if is "guard"
+      print('Str < 3')
+    case dict({'a': a}):  # Matches any dict that contains 'a'
+      print('Dict {a: %d}' % a)
+    case dict({'b': b, 'c': c}):
+      print('Dict {a: %d, b: %d}' % (b, c))
+    case float() | str():
+      print('Float or str')
+    case Foo():   # An instance of this class; alternative to isinstance
+      print('Foo')
+    case Foo2(x):  # capturing x requires defining __match_args__
+      print('Foo2 with %d' % x)
+    case _:
+      print('Unknown')
diff --git a/python/heapq.txt b/python/heapq.txt
@@ -1,6 +1,6 @@
 # Heapq
 * Operates on a list; not separate data structure!
-* items are compared by usual comparator, e.g 1 > 2; (1, 'a') < (3, 'b')
+* items are compared by usual comparator, e.g 1 < 2; (1, 'b') < (3, 'a')
 * Use dataclass for storing special structures: https://docs.python.org/3/library/heapq.html#priority-queue-implementation-notes
 
 

diff --git a/python/numpy.txt b/python/numpy.txt
@@ -322,7 +322,7 @@ random.choice(array, size, replace=False)  // sample (without) replacement from
 
 
 ## Random generator
-rng = np.random.default_rng(0)
+rng = np.random.default_rng(0 | None)
 rng.(uniform|integer|normal|choice|permutation|...)
 isinstance(rng, np.random.Generator) == True
 isinstance(rng, np.random.RandomState) == False

diff --git a/python/pandas.txt b/python/pandas.txt
@@ -63,6 +63,7 @@ decode('utf') // bytes -> unicode
 cat(sep=":', na_rep='') // concatenate values
   sep=  // field separator
   na_rep=None // if None, to sep is inserted / values ignored
+wrap(max_width)  // inserts \n after max_width
 
 
 # Delete, exclude, drop columns
@@ -84,6 +85,8 @@ df.corr(...)  // computes correlation between all columns
 df.apply(fun, axis=0) // apply function over axis
   axis=0  // over rows -> return value for each column
   axis=1 // over columns -> return value for each row
+  result_type='expand' // broadcast dict / list to DataFrame
+df.map // apply function to every element of DataFrame
 
 # sample DataFrame
 df.sample(n, ...)
@@ -522,12 +525,6 @@ pd.concat([d1, d2], axis=0, ...) // join data.frames on indices
   keys=['d1', 'd2'] // create Multiindex with these outer level values
   names=['outer', 'inner']  // names of levels in Multiindex
 
-## Stacking two frames like np.concat
-* Make sure than index is equal
-a.index = range(len(a))
-b.index = a.index
-ab = pd.concat((a, b), axis=1)
-
 
 # Joining 2 DataFrames on COLUMNS
 * Join by COLUMNS! (not index as pd.concat)
@@ -708,6 +705,7 @@ df.values // numpy array used internally
 df.values.nbytes  // memory usage
 df.sort_values('column', ascending=True, inplace=False)  // sort by column(s)
   ascinding=[True, False] // multiple columns
+df.sort_values([('level1_name, 'leve2_name')]) // sort by multiindex
 df.sort_index(axis=0, inplace=False)  // sort by row index
 df.sort_index(axis=1) // sort columns by names
 df.sort_index(by='column')  // sort by column -> like sort
@@ -988,12 +986,6 @@ series = to_datetime(pd.Series(['2010-01-01', '2010-02-01', '2010-03-01']))
 series.dt // returns object to access datetime properties
 
 
-# Applying functions
-.apply(lambda row: row + 1, axis=1) // apply over rows or columns
-.applymap(lambda x: x + 1)  // apply on single elements
-.pipe(lambda df: df.iloc[:2])  // apply on entire dataframe
-
-
 # Replacing columns
 df.set_axis(new_columns, 1)
 
@@ -1051,7 +1043,6 @@ df.style.
     # Maps values to a CSS style
     max_value = values.max()
     return ['background-color:yellow' if v == max_value else '' for v in values]
-.applymap(fn) // applies to values independently
 .set_caption(caption) // title
 .bar(cmap='viridis', width=100, ...)
 .set_properties{**{'text-align': 'center'}) // set CSS attributes
@@ -1073,3 +1064,26 @@ def show_corr(corr, metric='spearmanr', absolute=True, caption=None):
       .set_table_styles(
           [{'selector': 'th', 'props': [('border', '1px solid black')]}])
   )
+
+
+# to_dict / from_dict
+df.to_dict(orient='records')  // rows as dicts [{'c1': v1, 'c2': v2, ...}]
+df.to_dict(orient='list') // columns as dicts [{'c1:' [v1, v2, ...]}]
+pd.DataFrame(dicts) // from_dict
+
+
+# Column of dicts -> DataFrame
+pd.json_normalize(df['dict'])
+
+# Convert to list / tuples
+df.values.tolist()
+df.itertuples(index=False, name=None)
+
+
+# Iterating over rows
+index, series = df.iterrows() // returns Series; dtype of Series can change over rows
+named_tuples = df.itertuples(index=True) // returns NamedTuples; more efficient
+  getattr(named_tuple, 'column') // acccess by string
+  named_tuple._asdict()['column'] // acccess by string, less efficient than getattr
+series = df.items() // iterate over columns
+df.to_dict(orient='records') // returns list instead of Iterator (inefficient)
diff --git a/python/queue.txt b/python/queue.txt
@@ -0,0 +1,58 @@
+# Queue
+
+## collections.dequeue
+* queue: only supports removing elements from the back
+* dequeue: also supports removing elements form the front
+
+import collections
+
+q = collections.deque([1, 2, 3], maxlen=None)
+q.append(4)
+q.extend([5, 6])
+while q:
+  print(q.popleft())
+
+
+## queue.Queue
+* Used for synchronizing threads
+* thread-safe
+* Use collections.dequeue otherwise
+
+import queue
+
+q = queue.Queue(maxsize=0)
+q.put(1)
+q.put(2)
+while not q.empty():
+  print(q.get())
+
+
+# Priority queue
+
+## heapq
+* see heapq.txt
+
+q = []
+heapq.heappush(q, 3)
+heapq.heappush(q, 1)
+heapq.heappush(q, 2)
+
+q = [3, 1, 2]
+heapq.heapify(q) // in-place
+
+while q:
+  print(heapq.heappop(q))
+
+## queue.PriorityQueue
+* For threads (like queue.Queue)
+* Wrapper around heapq
+
+pq = queue.PriorityQueue()
+
+pq.put((1, "Task 1"))
+pq.put((3, "Task 3"))
+pq.put((2, "Task 2"))
+
+print(pq.get())  # Output: (1, 'Task 1')
+print(pq.get())  # Output: (2, 'Task 2')
+print(pq.get())  # Output: (3, 'Task 3')
diff --git a/python/re.txt b/python/re.txt
@@ -19,10 +19,15 @@ for m in re.finditer(pattern, string)  // like search, but iter of all hits
 re.findall(pattern, string) // all matches as str in array (see below)
 len(re.findall(...)) // count
 re.split(p, s)  // split by p; s.split(p) does not support re
+
+
+# re.sub
 re.sub(p, r, s) // subst ALL p in s by r and returns substituted string
-  re.sub(r'(.*\.)(solver)$', r'\g<1>\g<2>_cls', idx) // reference groups
-  re.sub(r'(.)\1{2,}', r'\1', 'aaabbbccc')  // replace duplicate chars
 re.subn() // like sub, but return number of replacements
+re.sub(r'(.*\.)(solver)$', r'\g<1>\g<2>_cls', idx) // reference groups
+  * use \g<1> instead of \1 since '\123' does not replace by (group1)23!
+// Backslash must be escaped in the replacement, not with re.escape!
+re.sub(regex, r'\g<1>' + repl.replace('\\', '\\\\') + r'\g<2>', text)
 
 
 # Groups
@@ -51,13 +56,20 @@ d['start'], d['stop']
 \S: opposite of \s [^\s]
 \w: alphanumeric [a-zA-Z0-9_].
 \W: opposite of \w
+\b: Word boundary (end/beginning of string; before/after whitespace)
 
 
 # Flags
 r'(?ism)...' // to express options in regex
 re.I | re.IGNORECASE // insensitive
 re.S | re.DOTALL // . matches also \n (it matches everything but . by default)
-re.M | re.MULTILINE // ^ matches also is beginning of a line, not only the entire string
+re.M | re.MULTILINE // ^ matches also beginning of a line, not only the entire string
+re.X | re.VERBOSE // ignores any whitespaces (' ' or \n) except '[ ]' and '\ ';
+                  // # must be escaped!
+  re.compile(r'''
+   \d +  # digests
+   [ ] \   # two spaces
+     ''', re.X) == re.compile(r'\d+[ ] ')
 
 # Matching string over multiple lines
 * `.` does not match \n by default
@@ -91,32 +103,55 @@ re.findall('[\w]+', source)
 
 
 # Capture groups
+(?P<name>regex): named capture group (for m.groupdict())
+(?:regex): group that is ignored (not in m.groups() / m.groupdict())
+(?=regex): positive lookahead; group that is not included m.group()
+  * re.search(f'[a-z]+(?=\d+)', ' abc213 ').group() == 'abc'
 
-## Exclude a sub-string that must exist (positive lookahead assertion)
-re.search(r'[^-]+(?=bar)', 'abcbar').group() -> abc; group() == group(0)
-re.search(r'[^-]+(?=bar)', 'abcba') // fails since string does not end with 'bar'
-
-## Ignore a group (non-capturing group)
-re.search(r'(set|let) var = (\\w+|\\d+)', 'set var = 1') ->  ('set', '1')
-re.search(r'(?:set|let) var = (\\w+|\\d+)', 'set var = 1') ->  ('1',)
 
-## Named groups
+## ?P<name> Named groups
 m = re.fullmatch(r'(--)?(?P<name>[^=]+)(=(?P<value>[^=]+))?', flag_string)
 m.group('name')
 m.group('value')  // None if missing
 m.groups()  // returns tuple of values
 .m.groupdict() returns dict mapping group names to group values
 .m.group('name')
 
+## ?: Ignore a group (non-capturing group)
+re.search(r'(set|let) var = (\\w+|\\d+)', 'set var = 1') ->  ('set', '1')
+re.search(r'(?:set|let) var = (\\w+|\\d+)', 'set var = 1') ->  ('1',)
+
+## ?= Exclude a sub-string that must exist (positive lookahead assertion)
+re.search(r'[^-]+(?=bar)', 'abcbar').group() -> abc; group() == group(0)
+re.search(r'[^-]+(?=bar)', 'abcba') // fails since string does not end with 'bar'
+
+## Replace digests that are followed characters by 'DIGEST' (not the characters)
+* Use !lookahead! for things that are not replace
+re.sub(r'\d+(?=[a-z]+)', 'DIGESTS|', '123abc') == 'DIGEST|abc'
+
+## Replace characters that are preceeded by by digests by '|CHARS (not the digests)
+* Use !lookbehind! expression
+* Lookbind expression must have fixed-width; \d+ does not work, only \d
+re.sub(r'(?<=\d)[a-z]+', '|CHARS', '123abc')  == '123|CHARS'
 
-# Escaping / raw string
-* Enables using backslash '\' in string without escaping
-* Use in regex when it is necessary to match '\'
-print('123\345') // 123å
-print(r'123\345') // 123/345
 
+# Raw string (r'', r"", r""" """)
+* \ has no effect
+print(r'\n') == '\n'
+print(r'\\n') == '\\n'
+display(r'\n') == '\\n'   // display (__repr__) escapes backslashes!
 
 re.fullmatch(r'\d+\\\d+', r'123\456')  # matches
 re.fullmatch('\d+\\\d+', r'123\456')  # does not match (re is not r)
 re.fullmatch('\d+\\\\\d+', r'123\456')  # matches
 re.fullmatch('\d+ \d+', '123 456')  # r not required for \d, \s, or \w
+
+# Triple quote
+* Allows line breaks (newline \n)
+* Newline can be escaped unless in raw string
+print('''\
+a''') == 'a'
+
+print(r'''\
+a''') == '\
+a'
diff --git a/python/strings.py b/python/strings.py
Original file line number	Diff line number	Diff line change
Expand Up		@@ -10,7 +10,6 @@ CoGroupByKey() // Groups only [(k1, v1), (k1, v2), (k3, v3)] -> [(k1, [v1, v2],
		* Yields pairs that exist in either of the two tables; values of one of the tables can be missing



		# IO

		io.ReadFromText(
Expand Down