diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..416e26b --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,29 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-20.04 + tools: + python: "3.9" + # You can also specify other tool versions: + # nodejs: "16" + # rust: "1.55" + # golang: "1.17" + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# If using Sphinx, optionally build your docs in additional formats such as PDF +# formats: +# - pdf + +# Optionally declare the Python requirements required to build your docs +python: + install: + - requirements: docs/requirements.txt \ No newline at end of file diff --git a/docs/Example Notebook sgm-data.md b/docs/Example Notebook sgm-data.md new file mode 100644 index 0000000..47ff405 --- /dev/null +++ b/docs/Example Notebook sgm-data.md @@ -0,0 +1,1700 @@ +# Example Usage +```python +import sgmdata +import pandas +import numpy as np +from matplotlib import pyplot as plt +``` + + + +
+ + Loading BokehJS ... +
+ + + + +## Dask Background +The whole sgm-data library makes use 'dask' arrays, this allows for multiprocessing capabilities, +in a 'pandas-like' programming environment. The dask client is useful for very large datasets, it sets up workers to propogate your data and the operations upon it across several worker processes / nodes. For more about dask visit [their website](https://docs.dask.org/en/latest/) + +The below cell is optional and, if run, should only be run once per session. Dask will work quicker on small operations without the client (but you may run out of memory). + + +```python +from dask.distributed import Client +client = Client('dscheduler:8786') ## Can also run Client() for smaller jobs (could be faster). +``` + +## Searching for Data +You can find your data in the SGMLive database by using the SGMQuery module. The following documentation details the keywords that you can use to customize your search. + +### class SGMQuery(**kwargs): + +>**sample** *(str:required)* At minimum you'll need to provide the keyword "sample", corresponding the sample name in the database as a default this will grab all the data under that sample name. + +> **daterange** *(tuple:optional)* This can be used to sort through sample data by the day that it was acquired. This is designed to take a tuple of the form `("start-date", "end-date")` where the strings are of the form `"YYYY-MM-DD"`. You can also just use a single string of the same form, instead of a tuple, this will make the assumption that "end-date" == now(). + +> **data** *(bool:optional)* As a default (True) the SGMQuery object will try to load the the data from disk, if this is not the desired behaviour set data=False. + +> **user** *(str:optional:staffonly)* Can be used to select the username in SGMLive from which the sample query is performed. Not available to non-staff. + +> **processed** *(bool:optional)* Can be used to return the paths for the processed data (already interpolated) instead of the raw. You would generally set `data = False` for this option. + +#### Attributes +> **data** (object) By default the query will create an SGMData object containing your data, this can be turned off with the `data` keyword. + +> **paths** (list). Contains the local paths to your data (or processed_data if `processed=True`). + + +```python +%%time +sgmq = sgmdata.SGMQuery(sample="TeCN - C", user='kumarp') +``` + + +```python +sgm_data = sgmq.data +``` + +## Loading Data +Data can be loaded in as a single file path, or as a list or paths. The actual data is only loaded as a representation at first. By default SGMQuery creates an SGMData object under the property 'data'. + + +### class SGMData(file_paths, **kwargs): + +#### arg + +#### Keywords + +>**axes** *(str:optional)* At minimum you'll need to provide the keyword "sample", corresponding the sample name in the database as a default this will grab all the data under that sample name. + +> **daterange** *(tuple:optional)* This can be used to sort through sample data by the day that it was acquired. This is designed to take a tuple of the form `("start-date", "end-date")` where the strings are of the form `"YYYY-MM-DD"`. You can also just use a single string of the same form, instead of a tuple, this will make the assumption that "end-date" == now(). + +> **data** *(bool:optional)* As a default (True) the SGMQuery object will try to load the the data from disk, if this is not the desired behaviour set data=False. + +> **user** *(str:optional:staffonly)* Can be used to select the username in SGMLive from which the sample query is performed. Not available to non-staff. + +> **processed** *(bool:optional)* Can be used to return the paths for the processed data (already interpolated) instead of the raw. You would generally set `data = False` for this option. + +#### Functions + +#### Attributes +> **scans** (object) By default the query will create an SGMData object containing your data, this can be turned off with the `data` keyword. + +> **paths** (list). Contains the local paths to your data (or processed_data if `processed=True`). + + +The data is auto grouped into three classifications: "independent", "signals", and "other". You can view the data dictionary representation in a Jupyter cell by just invoking the SGMData() object. + + +```python +from sgmdata import preprocess +preprocess(sample="TeCN - C", user='arthurz', resolution=0.1, client=client) +``` + + Averaged 10 scans for TeCN - C + + + + + + + + + +## The SGMScan object +Contains a representation in memory of the data loaded from disk, plus any interpolated scans. + + +```python +sgm_data.scans['2022-02-08t14-56-25-0600'] +``` + + + + + + + + + + +
Sample Command Independent Signals Other
entry3 TeCN - C ['cscan', 'en', '270', '320', '60'] ['en'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image']
+ + + + +```python +sgm_data.scans['2021-08-18t04-14-47-0600'].entry1.command +``` + + + + + ['eemscan', 'en', '270', '2000', '60', '100'] + + + + +```python +sgm_data.scans['2021-08-18t04-14-47-0600'].entry1.independent['en'] +``` + + + + + + + + + +
+ + + + + + + + + + +
Array Chunk
Bytes 137.20 kiB 45.73 kiB
Shape (35123,) (11707,)
Count 5 Tasks 4 Chunks
Type float32 numpy.ndarray
+
+ + + + + + + + + + + + + + + + + + 35123 + 1 + +
+ + + + +```python +sgm_data.scans['2021-08-18t04-14-47-0600'].entry1.signals['tey'] +``` + + + + + + + + + +
+ + + + + + + + + + +
Array Chunk
Bytes 137.20 kiB 45.73 kiB
Shape (35123,) (11707,)
Count 5 Tasks 4 Chunks
Type float32 numpy.ndarray
+
+ + + + + + + + + + + + + + + + + + 35123 + 1 + +
+ + + +## Plotting Scan Data +For individual plots, you can visualize access the data, and plot it manually, or you can use the plot() routine. If interpolation step has already been performed, the data will be from that source. + + +```python +en = sgm_data.scans['2021-08-18t04-14-47-0600'].entry1.independent['en'] +tey = sgm_data.scans['2021-08-18t04-14-47-0600'].entry1.signals['tey'] + +plt.plot(en,tey) +``` + + + + + [] + + + + + +![png](output_15_1.png) + + + + +```python +arr = sgm_data.scans['2021-08-18t04-14-47-0600'].entry1.signals['sdd3'] +plt.imshow(arr, extent=[10,2560, 270, 2000]) +``` + + + + + + + + + + +![png](output_16_1.png) + + + + +```python +arr1 = sgm_data.scans['2021-08-18t04-14-47-0600'].entry1.signals['sdd1'] +arr2 = sgm_data.scans['2021-08-18t04-14-47-0600'].entry1.signals['sdd2'] +arr3 = sgm_data.scans['2021-08-18t04-14-47-0600'].entry1.signals['sdd3'] +arr4 = sgm_data.scans['2021-08-18t04-14-47-0600'].entry1.signals['sdd4'] + +fig, axs = plt.subplots(2, 2) +axs[0,0].imshow(arr1, extent=[10,2000, 270, 2000], vmin = 1, vmax = 1000) +axs[0,1].imshow(arr2, extent=[10,2000, 270, 2000], vmin = 1, vmax = 1000) +axs[1,0].imshow(arr3, extent=[10,2000, 270, 2000], vmin = 1, vmax = 1000) +axs[1,1].imshow(arr4, extent=[10,2000, 270, 2000], vmin = 1, vmax = 1000) +``` + + + + + + + + + + +![png](output_17_1.png) + + + + +```python +sgm_data.scans['2021-08-18t04-14-47-0600'].entry1.plot() +``` + +## Interpolating Data +Individual scans are loaded into the SGMData namespace, and can be interpolated from here. By selecting compute == False we can stage the dask array computation to occur at a later time (e.g. by running object.compute()). + + +```python +df = sgm_data.scans['2021-08-18t04-14-47-0600'].entry1.interpolate(resolution=0.25) +``` + + +```python +sgm_data.scans['2021-01-21t13-47-04-0600'].entry2.keys() +``` + + + + + dict_keys(['command', 'sample', 'description', 'independent', 'signals', 'other', 'npartitions', 'new_axes', 'dataframe', 'binned']) + + + + +```python +sgm_data.scans['2021-01-21t13-47-04-0600'].entry2.binned['dataframe'] +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
aux1clocki0pdsdd1-0sdd1-1sdd1-2sdd1-3sdd1-4sdd1-5...sdd4-249sdd4-250sdd4-251sdd4-252sdd4-253sdd4-254sdd4-255temp1temp2tey
en
440.0000000.00.02223972052.909091403.4545450.00.00.00.00.00.0...0.00.00.00.00.00.00.081.363636186.81818212242.818182
440.1001670.00.02210269012.800000415.8000000.00.00.00.00.00.0...0.00.00.00.00.00.00.072.000000181.20000012866.400000
440.2003340.00.02274773289.000000402.7142860.00.00.00.00.00.0...0.00.00.00.00.00.00.079.857143193.42857112280.000000
440.3005010.00.02229571447.500000411.0000000.00.00.00.00.00.0...0.00.00.00.00.00.00.081.333333187.16666712558.166667
440.4006680.00.02242971883.166667408.3333330.00.00.00.00.00.0...0.00.00.00.00.00.00.081.166667192.83333312588.000000
..................................................................
499.5993320.00.01996383576.600000390.2000000.00.00.00.00.00.0...0.00.00.00.00.00.00.079.800000179.80000036676.000000
499.6994990.00.02002185236.166667391.0000000.00.00.00.00.00.0...0.00.00.00.00.00.00.090.333333190.66666736227.333333
499.7996660.00.01995284377.800000400.6000000.00.00.00.00.00.0...0.00.00.00.00.00.00.079.600000190.20000036549.800000
499.8998330.00.01991382349.000000393.0000000.00.00.00.00.00.0...0.00.00.00.00.00.00.074.666667183.50000036853.833333
500.0000000.00.02183884041.473214392.3482140.00.00.00.00.00.0...0.00.00.00.00.00.00.080.339286188.36160736589.477679
+

600 rows × 1031 columns

+
+ + + + +```python +sgm_data.scans['2021-01-21t13-47-04-0600'].entry2.plot() +``` + + Plotting Interpolated Data + + + + + + + + + +
+ + + + + +### Batch interpolation +You can also batch interpolate the loaded scans from the top of the namespace. +Note: this process is only applicable if all scans loaded in the namespace can take the same interpolation parameters. + + +```python +%%time +interp_list = sgm_data.interpolate(resolution=0.1, start=450, stop=470) +``` + + + 0%| | 0/10 [00:00Averaged Data, from the list of 'like' scans using the plot() function. + + +```python +sgm_data.averaged['TiO2 - Ti'][0].plot() +``` + + + + + + + + +
+ + + + + + +```python +df =averaged['TiO2 - Ti'][0]['data'] +``` + + +```python +df.filter(regex="sdd2.*").to_numpy().shape +``` + + + + + (200, 256) + + + +## Fitting XRF Spectra +Using any data set for which an interpolation has already been performed, the fit_mcas function can be used to find peaks and batch fit them for all four sdd detectors. + + +```python +%%time +sgm_data = sgmdata.SGMQuery(sample="Focus Testing 2").data +sgm_data +``` + + + 0%| | 0/35 [00:00 + + File Entry Sample Command Independent Signals Other + + +2021-07-29t14-54-37-0600entry35 Focus Testing 2 ['cmesh', 'xp', '6.2', '5.7', '10', 'yp', '-0.8', '-1.1'] ['xp', 'yp'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t14-46-01-0600entry33 Focus Testing 2 ['cscan', 'hex_y', '-9.5576', '-8.5576'] ['hex_y'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t14-45-30-0600entry32 Focus Testing 2 ['cscan', 'hex_x', '-2.1949', '-1.1949'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t14-30-48-0600entry31 Focus Testing 2 ['cscan', 'hex_y', '-9.5576', '-8.5576'] ['hex_y'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t14-27-43-0600entry30 Focus Testing 2 ['cscan', 'hex_x', '-2.1949', '-1.1949'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t13-04-30-0600entry29 Focus Testing 2 ['cmesh', 'xp', '7', '5', '10', 'yp', '1', '-1'] ['xp', 'yp'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t13-01-16-0600entry28 Focus Testing 2 ['cmesh', 'xp', '7', '5', '10', 'yp', '1', '-1'] ['xp', 'yp'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-56-50-0600entry27 Focus Testing 2 ['cscan', 'hex_x', '-2.1949', '-1.1949'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-53-38-0600entry26 Focus Testing 2 ['cscan', 'hex_x', '-2.1949', '-1.1949'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-51-54-0600entry25 Focus Testing 2 ['cscan', 'hex_x', '-2.1949', '-1.1949'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-48-04-0600entry24 Focus Testing 2 ['cscan', 'hex_x', '-2.1948', '-1.1948'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-47-22-0600entry23 Focus Testing 2 ['cscan', 'hex_x', '-2.1949', '-1.1949'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-46-42-0600entry22 Focus Testing 2 ['cscan', 'hex_x', '-2.1948', '-1.1948'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-38-23-0600entry21 Focus Testing 2 ['cscan', 'hex_x', '-2.1949', '-1.1949'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-37-50-0600entry20 Focus Testing 2 ['cscan', 'hex_y', '-9.5576', '-8.5576'] ['hex_y'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-37-16-0600entry19 Focus Testing 2 ['cscan', 'hex_y', '-9.5576', '-8.5576'] ['hex_y'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-36-41-0600entry18 Focus Testing 2 ['cscan', 'hex_y', '-9.5576', '-8.5576'] ['hex_y'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-35-32-0600entry17 Focus Testing 2 ['cscan', 'hex_y', '-9.5576', '-8.5576'] ['hex_y'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-34-50-0600entry16 Focus Testing 2 ['cscan', 'hex_y', '-9.5576', '-8.5576'] ['hex_y'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-34-12-0600entry15 Focus Testing 2 ['cscan', 'hex_y', '-9.5576', '-8.5576'] ['hex_y'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-33-26-0600entry14 Focus Testing 2 ['cscan', 'hex_y', '-9.5576', '-8.5576'] ['hex_y'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-32-46-0600entry13 Focus Testing 2 ['cscan', 'hex_y', '-9.5576', '-8.5576'] ['hex_y'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-01-05-0600entry12 Focus Testing 2 ['cscan', 'hex_y', '-9.5576', '-8.5576'] ['hex_y'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t12-00-01-0600entry11 Focus Testing 2 ['cscan', 'hex_y', '-9.5574', '-8.5574'] ['hex_y'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t11-55-41-0600entry10 Focus Testing 2 ['cscan', 'hex_x', '-2.1948', '-1.1948'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t11-50-14-0600entry9 Focus Testing 2 ['cscan', 'hex_x', '-2.1949', '-1.1949'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t11-40-01-0600entry8 Focus Testing 2 ['cscan', 'hex_x', '-2.1948', '-1.1948'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'temp1', 'temp2', 'tey'] ['emission', 'image'] +2021-07-29t11-31-42-0600entry7 Focus Testing 2 ['cscan', 'hex_x', '-2.2467', '-1.2467'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'tey'] ['emission', 'image'] +2021-07-29t11-28-15-0600entry6 Focus Testing 2 ['cscan', 'hex_x', '-2.0383', '0'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'tey'] ['emission', 'image'] +2021-07-29t11-21-34-0600entry4 Focus Testing 2 ['cscan', 'hex_y', '-9.75', '-8'] ['hex_y'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'tey'] ['emission', 'image'] +2021-07-29t11-18-07-0600entry3 Focus Testing 2 ['cscan', 'hex_x', '-2.24', '0'] ['hex_x'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'tey'] ['emission', 'image'] +2021-07-29t11-06-02-0600entry2 Focus Testing 2 ['cscan', 'yp', '2', '-2'] ['yp'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'tey'] ['emission', 'image'] +2021-07-29t11-00-29-0600entry1 Focus Testing 2 ['cmesh', 'hex_x', '-1', '-2', '5', 'hex_y', '0', '-1'] ['hex_x', 'hex_y'] ['aux1', 'clock', 'i0', 'pd', 'sdd1', 'sdd2', 'sdd3', 'sdd4', 'tey'] ['emission', 'image'] + + + + + +```python +xrange = (7.0, 5.0) +yrange = (-1.0, 1.0) +dx = abs(xrange[0] - xrange[1])/(int(10)* 20) +dy = abs(yrange[0] - yrange[1])/50 + +sgm_data.scans['2021-07-29t13-04-30-0600'].entry29.interpolate(resolution=[dx, dy], start=[min(xrange),min(yrange)], stop=[max(xrange), max(yrange)]) +``` + + /opt/conda/lib/python3.8/site-packages/sgmdata/load.py:142: UserWarning: Resolution setting can't be higher than experimental resolution, setting resolution for axis 0 to 0.011050 + warnings.warn( + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
aux1clocki0pdsdd1-0sdd1-1sdd1-2sdd1-3sdd1-4sdd1-5...sdd4-249sdd4-250sdd4-251sdd4-252sdd4-253sdd4-254sdd4-255temp1temp2tey
xpyp
5.0-1.0000000.00.03807393013.0000001085.0000000.00.0000000.0000000.000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0069.333333227.000000108924.666667
-0.9591840.00.04267293184.2500001095.5000000.00.2500000.5000000.751.0000001.250000...62.25000062.50000062.75000063.00000063.25000063.50000063.7563.250000247.50000059221.000000
-0.9183670.00.04166193632.2500001091.5000000.00.2500000.5000000.751.0000001.250000...68.07927762.50000062.75000068.82927763.25000063.50000063.7565.250000240.25000061433.250000
-0.8775510.00.04152892952.2500001094.7500000.00.2500000.5000000.751.0000001.250000...67.76339762.50000062.75000063.00000063.25000063.50000063.7571.500000258.00000070244.250000
-0.8367350.00.03928392714.0000001094.0000000.00.2500000.5000000.751.0000001.250000...68.60759062.50000062.75000063.00000063.25000063.50000063.7568.250000248.25000065700.000000
.....................................................................
7.00.8367350.00.04566693865.3333331093.3333330.00.3333330.6666671.001.3333331.666667...83.00000083.33333383.66666784.00000084.33333384.66666785.0065.333333240.66666710716.333333
0.8775510.00.05129594221.0000001089.3333330.00.3333330.6666671.001.3333331.666667...83.00000083.33333383.66666784.00000084.33333384.66666785.0078.333333242.66666710584.666667
0.9183670.00.04510893837.7500001097.0000000.00.2500000.5000000.751.0000001.250000...62.25000062.50000062.75000063.00000063.25000063.50000063.7566.500000221.75000010620.000000
0.9591840.00.04413394547.7500001093.5000000.00.2500000.5000000.751.0000001.250000...62.25000062.50000062.75000063.00000063.25000063.50000063.7573.750000231.75000010388.500000
1.0000000.00.05346891688.5000001122.0000000.00.5000001.0000001.502.0000002.500000...124.500000125.000000125.500000126.000000126.500000127.000000127.5074.500000225.00000010168.500000
+

9050 rows × 1031 columns

+
+ + + + +```python +sgm_data.scans['2021-07-29t13-04-30-0600'].entry29.plot() +``` + + +```python +sgm_data.scans['2021-07-29t13-04-30-0600'].entry29.fit_mcas() +``` + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
aux1clocki0pdtemp1temp2teysdd1-10sdd1-26sdd1-52...sdd3-10sdd3-26sdd3-52sdd3-74sdd3-93sdd4-10sdd4-26sdd4-52sdd4-74sdd4-93
xpyp
5.0-1.0000000.00.03807393013.0000001085.00000069.333333227.000000108924.66666765.767553513.2269562944.373778...17.183095506.3023593029.581220188.478364220.28489273.284874653.0235003282.760711238.172125216.991867
-0.9591840.00.04267293184.2500001095.50000063.250000247.50000059221.000000122.8181682962.6435387829.947016...17.408436718.0450581602.94970385.13476891.051312210.3033902874.4930437783.3064412359.8200341645.555089
-0.9183670.00.04166193632.2500001091.50000065.250000240.25000061433.250000132.9857012746.7051997648.466225...26.9200564034.0518228637.4668831736.1559821000.515494206.6621322769.2634295904.357746860.556876456.507695
-0.8775510.00.04152892952.2500001094.75000071.500000258.00000070244.250000138.2851302713.2129877538.945823...31.6131713929.9660828684.7759782263.7534391358.506114204.3229403305.9342088029.1692392198.1707061348.027300
-0.8367350.00.03928392714.0000001094.00000068.250000248.25000065700.00000086.7750261826.8515693578.088948...34.9646174216.1823188614.2529242108.5936741218.672299208.3748012893.1936657995.2459752289.3659651519.073097
.....................................................................
7.00.8367350.00.04566693865.3333331093.33333365.333333240.66666710716.33333334.38344911.40344040.518849...11.30185914.30098845.39470835.47906449.8769096.11968915.86876638.18444035.36937347.119717
0.8775510.00.05129594221.0000001089.33333378.333333242.66666710584.66666716.52489714.86781442.868999...7.73675417.19984150.76095834.96818348.2941155.60312913.65995048.01468735.88285848.355163
0.9183670.00.04510893837.7500001097.00000066.500000221.75000010620.00000029.11029512.46963033.198436...8.55392910.59084247.28211127.05681038.8686085.21360410.27040841.76250126.91116544.708442
0.9591840.00.04413394547.7500001093.50000073.750000231.75000010388.50000026.69859311.58907134.177772...6.47852711.11341346.55823926.34835437.8402996.76480213.48736339.17271526.59664335.872150
1.0000000.00.05346891688.5000001122.00000074.500000225.00000010168.50000013.97767326.29679249.423505...7.10049023.12232061.54057752.23582769.0800139.63824520.90920560.50287752.43687767.438887
+

9050 rows × 27 columns

+
+ + + + +```python +sgm_data.scans['2021-07-29t13-04-30-0600'].entry29.plot() +``` + + + + + + + + +
+ + + + + + +```python +import numpy as np +df = sgm_data.scans['2021-07-29t13-04-30-0600'].entry29['binned']['dataframe'] +v = df.filter(regex=("sdd1.*"), axis=1).to_numpy() +sdd1 = np.reshape(v,(len(df.index.levels[0]), len(df.index.levels[1]), v.shape[-1])) + +plt.imshow(np.sum(sdd1[:,:,45:55], axis=2).T) +``` + + + + + + + + + + +![png](output_38_1.png) + + + +## Utilities +Aside from the core use cases, there are some useful utilities for exploring the HDF5 files. + + +```python +import h5py +from sgmdata.utilities import h5tree, scan_health +from sgmdata import preprocess +``` + + +```python +scan_health? +``` + + +```python +preprocess? +``` + + +```python +preprocess(sample="Test Sample", resolution=0.05) +``` + + +```python +h5tree? +``` + + +```python +f = h5py.File("/home/jovyan/data/arthurz/2020-01-07t12-56-39-0600.nxs", 'r') +h5tree(f) +``` + + ->entry1 attrs:{NX_class:NXentry,} + |-command type:object shape:() attrs:{} + ->data attrs:{NX_class:NXdata,axes:['yp' 'xp'],signal:sdd3,} + |-clock type:float32 shape:(3156,) attrs:{units:s,} + |-emission type:int32 shape:(256,) attrs:{units:eV,} + |-i0 type:float32 shape:(3156,) attrs:{gain:1 mA/V,units:a.u.,} + |-pd type:float32 shape:(3156,) attrs:{gain:5 uA/V,units:a.u.,} + |-sdd1 type:float64 shape:(3156, 256) attrs:{NX_class:NXdetector,} + |-sdd2 type:float64 shape:(3156, 256) attrs:{NX_class:NXdetector,} + |-sdd3 type:float64 shape:(3156, 256) attrs:{NX_class:NXdetector,} + |-sdd4 type:float64 shape:(3156, 256) attrs:{NX_class:NXdetector,} + |-temp1 type:float32 shape:(3156,) attrs:{NX_class:NX_TEMPERATURE,conversion:T = 5.648 + |-temp2 type:float32 shape:(3156,) attrs:{NX_class:NX_TEMPERATURE,conversion:T = 5.648 + |-tey type:float32 shape:(3156,) attrs:{gain:1 mA/V,units:a.u.,} + |-wavelength type:float64 shape:(1044,) attrs:{units:nm,} + |-xeol type:float64 shape:(3156, 1044) attrs:{NX_class:NXdetector,} + |-xp type:float32 shape:(3156,) attrs:{units:mm,} + |-yp type:float32 shape:(3156,) attrs:{units:mm,} + |-defintion type:object shape:() attrs:{} + ->instrument attrs:{NX_class:NXinstrument,} + ->absorbed_beam attrs:{NX_class:NXdetector,} + ->fluorescence attrs:{NX_class:NXfluorescence,} + ->incoming_beam attrs:{NX_class:NXdetector,} + ->luminescence attrs:{NX_class:NXfluorescence,} + ->mirror attrs:{NX_class:NXmirror,} + |-kbhb_d type:object shape:() attrs:{} + |-kbhb_u type:object shape:() attrs:{} + |-kblh type:object shape:() attrs:{} + |-kblv type:object shape:() attrs:{} + |-kbvb_d type:object shape:() attrs:{} + |-kbvb_u type:object shape:() attrs:{} + |-stripe type:object shape:() attrs:{} + ->monochromator attrs:{NX_class:NXmonochromator,} + |-en type:object shape:() attrs:{units:eV,} + |-en_err type:int64 shape:() attrs:{units:E/dE,} + ->exit_slit attrs:{NX_class:NXslit,} + |-exit_slit type:float32 shape:() attrs:{units:μm,} + |-exs type:object shape:() attrs:{units:mm,} + ->grating attrs:{NX_class:NXgrating,} + |-coating_material type:object shape:() attrs:{} + |-coating_roughness type:int64 shape:() attrs:{units:rms(Å),} + |-coating_thickness type:int64 shape:() attrs:{units:Å,} + |-deflection_angle type:int64 shape:() attrs:{units:degrees,} + |-interior_atmosphere type:object shape:() attrs:{} + |-period type:object shape:() attrs:{} + |-sgm type:object shape:() attrs:{units:mm,} + |-shape type:object shape:() attrs:{} + |-substrate_material type:object shape:() attrs:{} + ->source attrs:{NX_class:NXsource,} + |-current type:float32 shape:() attrs:{units:mA,} + |-name type:object shape:() attrs:{} + |-probe type:object shape:() attrs:{} + |-sr_energy type:float64 shape:() attrs:{units:GeV,} + |-top_up type:object shape:() attrs:{} + |-type type:object shape:() attrs:{} + |-und_gap type:object shape:() attrs:{units:mm,} + ->monitor attrs:{NX_class:NXmonitor,} + |-proposal type:object shape:() attrs:{} + ->sample attrs:{NX_class:NXsample,} + |-description type:object shape:() attrs:{} + |-image type:uint8 shape:(1200, 1600, 3) attrs:{CLASS:IMAGE,IMAGE_SUBCLASS:IMAGE_TRUEC + |-name type:object shape:() attrs:{} + ->positioner attrs:{NX_class:NXpositioner,} + |-hex_x type:object shape:() attrs:{units:mm,} + |-hex_y type:object shape:() attrs:{units:mm,} + |-hex_z type:object shape:() attrs:{units:mm,} + |-zp type:object shape:() attrs:{units:mm,} + ->potentiostat attrs:{NX_class:NXvoltage,} + ->temperature attrs:{NX_class:NXtemperature,} + |-start_time type:object shape:() attrs:{} + |-user type:object shape:() attrs:{} + + +```python + +``` diff --git a/docs/GettingStarted.md b/docs/GettingStarted.md new file mode 100644 index 0000000..3423136 --- /dev/null +++ b/docs/GettingStarted.md @@ -0,0 +1,36 @@ +# Getting Started +### Installation: +Using pip: +```commandline +pip install sgm-data +``` +or from source: +```commandline +git clone https://github.lightsource.ca/arthurz/sgmdata ./sgmdata +cd sgmdata +python setup.py install +``` +### Local Usage: +First import the package, and select data to load in. +```python +import sgmdata +data = sgmdata.SGMData(["file1.hdf5", "file2.hdf5", "..."]) +``` +This will identify the independent axis, signals and other data within the files listed. + +Useful functions: +```python +data.scans #contains a dictionary of the identified data arrays loaded from your file list +data.interpolate(start=270, stop=2000, resolution=0.5) #bin the data in scans dictionary and interpolates missing points +data.mean() #average data with the same sample name, and spec command together. +``` +Working with individual scans: +```python +df = data.scans['FilePrefix'].entry1.interpolate(start=270, stop=2000, resolution=0.1) #bin data for a single scan. +df2 = data.scans['FilePrefix'].entry1.fit_mcas() #perform batch gaussian fitting of interpolated SDD signal +``` +Plotting (with [Bokeh](https://docs.bokeh.org/en/latest/index.html)): +```python +data.scans['FilePrefix'].entry1.plot() +data.averaged['SampleName'].plot() +``` \ No newline at end of file diff --git a/docs/Load.md b/docs/Load.md new file mode 100644 index 0000000..b3653cd --- /dev/null +++ b/docs/Load.md @@ -0,0 +1,182 @@ + +# API +## SGMData(object) +----- + +### Description: +Class for loading in data from h5py or h5pyd files for raw SGM data. +To substantiate pass the class pass a single (or list of) system file paths +(or hsds path). e.g. data = SGMData('/path/to/my/file.nxs') or SGMData(['1.h5', '2.h5']). +The data is auto grouped into three classifications: "independent", "signals", and "other". +You can view the data dictionary representation in a Jupyter cell by just invoking the SGMData() object. + +### Args: +>**file_paths** *(str or list)* List of file names to be loaded in by the data module. + +### Keywords: +>**npartitions** *(type: integer)* -- choose how many divisions (threads) +to split the file data arrays into. + +>**scheduler** *(type: str)* -- use specific dask cluster for operations, e.g. 'dscheduler:8786' + +>**axes** *(type: list(str))* -- names of the axes to use as independent axis and ignore +spec command issued + +>**threads** *(type: int)* -- set the number of threads in threadpool used to load in data. + +>**scan_type** *(type: str)* -- used to filter the type of scan loaded, e.g. 'cmesh', ' + +>**shift** *(type: float)* -- default 0.5. Shifting 'x' axis data on consecutive passes of stage +for cmesh scans. + +### Functions: +>**interpolate()** -- botch operation on all scans in SGMData, takes in same parameters as interpolate(), +see interpolate() documentation. + +>**mean()** -- averages all interpolated data together (organized by sample, scan type & range), returns list, saves data +under a dictionary in SGMData().averaged + + +### Attributes +>**scans** *(SGMScan)* By default the query will create an SGMData object containing your data, this can be turned off with the data keyword. + +>**averaged** *(list)*. Contains the averaged data from all interpolated datasets contained in the scan. + +## SGMScan(object) +----- + +### Description: +>Data class for storing dask arrays for SGM data files that have been grouped into 'NXentry', +and then divided into signals, independent axes, and other data. Contains convenience classes +for interpolation. + +### Functions: +>**interpolate()** -- for each scan entry in self.items() there is a SGMScan.entry.interpolate() function, +see interpolate() documentation. + +>**plot()** -- for each scan entry in self.items() there exists a SGMScan.entry.plot() method for displaying the +contained data with bokeh. + +>**fit_mcas()** -- for each scan entry in self.items() there exists a SGMScan.entry.fit_mcas() method for gaussian +peak fitting of the interpolated mca data. Returns resulting dataframe. + +>**get_arr()** -- for each scan entry in self.items() there exists a SGMScan.entry.get_arr() which will return a numpy array +from an stored interpolated dataframe by using a keyword filter: +```python +from sgmdata import SGMData + +data = SGMData('file.nxs') +data.interpolate() +sdd1 = data.get_arr('sdd1') +sdd1.shape # (1290, 256) +``` + +## SGMQuery(object) +----- + +### Description: +>You can find your data in the SGMLive database by using the SGMQuery module (when using the [SGM JupyterHub]( +https://sgm-hub.lightsource.ca) ). The following documentation details the keywords that you can use to customize your +search. + +### Keywords: +>**sample** *(str:required)* -- At minimum you'll need to provide the keyword "sample", corresponding the sample +name in the database as a default this will grab all the data under that sample +name. + +>**daterange** *(tuple:optional)* -- This can be used to sort through sample data by the day that it was +acquired. This is designed to take a tuple of the form ("start-date", +"end-date") where the strings are of the form "YYYY-MM-DD". You can also +just use a single string of the same form, instead of a tuple, this will +make the assumption that "end-date" == now(). + +>**data** *(bool:optional)* -- As a default (True) the SGMQuery object will try to load the the data from disk, +if this is not the desired behaviour set data=False. + +>**user** *(str:optional:staffonly)* -- Can be used to select the username in SGMLive from which the sample query is +performed. Not available to non-staff. + +>**processed** *(bool:optional)* -- Can be used to return the paths for the processed data (already interpolated) instead +of the raw. You would generally set data = False for this option. + +### Attributes: +>**data** *(object)* -- By default the query will create an SGMData object containing your data, this can be turned off +with the data keyword. + +>**paths** *(list)* -- Contains the local paths to your data (or processed_data if processed=True). + +### Example Usage: +```python +from sgmdata import SGMQuery + +sgmq = SGMQuery(sample="TiO2 - C", processed=True) +data = sgmq.data +data.averaged['TiO2 - C'].plot() +``` + +## ReportBuilder(object) +----- + +### Description +>LaTeX document builder for SGMData mail-in program. Requires connection to CLS internal confluence site, and +assembles documents from the experimental logs saved therein. + +### Args +> **proposal** *(str)* -- Project proprosal number (in the title of the confluence page) + +> **principal** *(str)* -- The last name of the PI for the project, included in the title of the confluence page. + +> **cycle** *(int)* -- The cycle for which the report data was collected. + +> **session** *(int)* -- The experiment number from SGMLive + +> **shifts** *(int)* -- The number of shifts used to collected this data (information can be found in SGMLive +usage data) + +### Functions +> **create_sample_report(plots=True, key=None, process=True)** -- If initialization of object has +gone smoothly, you can create the sample report. + +>> kwargs: plots - create plots; key - specific sample holder, e.g. 'Holder A - a8asdk5'; process - interpolate +and average data not already processed in user account. For additional kwargs, see preprocess [documentation]( +/Utilities.html#preprocess). + +## interpolate(): +----- + +### Description: +>Creates the bins required for each independent axes to be histogrammed into for interpolation, +then uses dask dataframe groupby commands to perform a linear interpolation. + +### Args: +>**independent** *(dict)* -- Dictionary of independent axes from SGMScan.entry + +>**signals** *(dict)* -- Dictionary of signals from SGMScan.entry + +### Keywords: +>**start** *(list or number)* -- starting position of the new array + +>**stop** *(list or number)* -- ending position of the new array + +>**bins** *(list of numbers or arrays)* -- this can be an array of bin values for each axes, +or can be the number of bins desired. + +>**resolution** *(list or number)* -- used instead of bins to define the bin to bin distance. + +>**sig_digits** *(int)* -- used to overide the default uncertainty of the interpolation axis of 2 (e.g. 0.01) + +## fit_peaks(): +_____ + +### Description: +Method for fitting multiple interpolated SDD numpy arrays with a sum of gaussians. + +### Args: +>**emission** *(ndarray)* -- labels for xrf bins + +>**sdd** *(list)* -- list of sdd detector signals filtered from dataframe. + +### Keywords: +>**bounds** *(list)* -- list of len 2, included start and stop bin of mcas to be fit. + + diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/Utilities.md b/docs/Utilities.md new file mode 100644 index 0000000..5629334 --- /dev/null +++ b/docs/Utilities.md @@ -0,0 +1,154 @@ + +# Utilities +## h5tree +----- + +### Description: +>A function to output the data-tree from an hdf5 file object. + +### Args: +>**h5** *(h5py.File)* -- Any H5File object, from h5py. + +### Returns: +>**None** + +### Example Usage: +```python +from sgmdata.utilities import h5tree +import h5py + +f = h5py.File("Filename.nxs", 'r') +h5tree(f) +``` + +## plot1d +----- + +### Description: +>Convenience function for plotting a bokeh lineplot, assumes Bokeh is already loaded. + +### Args: +>**xarr** *(array-like)* -- Independent array-like object, or list of array-like objects. + +>**yarr** *(array-like)* -- Dependent array-like object, or list of array-like objects, same shape as xarr + +>**title** *(str)* -- Plot title + +>**labels** *(list(str))* -- Legend labels for multiple objects, defaults to Curve0, Curve1, etc. + +### Returns: +>**None** + +## preprocess +----- + +### Description: +>Utility for automating the interpolation and averaging of a sample in the SGMLive website. + +### Args: +>**sample** *(str)*: The name of the sample in your account that you wish to preprocess. + +### Keywords: +>All of the below are optional. + +>**user** *(str)* -- name of user account to limit search to (for use by staff). + +>**resolution** *(float)* -- to be passed to interpolation function, this is histogram bin width. + +>**start** *(float)* -- start energy to be passed to interpolation function. + +>**stop** *(float)* -- stop energy to be passed to interpolation function. + +>**sdd_max** *(int)* -- threshold value to determine saturation in SDDs, to determine scan_health (default +is 105000). +>**bscan_thresh** *(tuple)* -- (continuous, dumped, and saturated) these are the threshold percentages from +scan_health that will label a scan as 'bad'. + +### Returns: +>(HTML) hyperlink for preprocessed data stored in SGMLive + +### Example Usage: +```python +from sgmdata import preprocess + +preprocess(sample="TiO2", user='regiert', resolution=0.1) +``` + +## badscans +----- + +### Description: +>Batch calculation of sgmdata.utilities.scan_health for list of interpolated dataframes. + +### Args: +>interp (list) : list of SGMScan binned dataframes. + +### Returns: +>List of indexes for bad scans in interp. + + +## scan_health +----- + +### Description: +>Function takes in a interpolated scan (a pandas DataFrame), and returns the overall health. + +### Args: +>**df** *(DataFrame)* -- pandas dataframe from SGMScan.binned. + +>**verbose** *(bool)* -- Explain the returned output in plain text. + +>**sdd_max** *(int)* -- 105000 (default) - saturation value for total SDD counts/s + +### Returns: +>(tuple): (Discontiunty %, Beam-dump %, Saturation %) + +## create_csv +----- + +### Description: +>Make CSV file from sample(s) + +### Args: +>**sample** *(str or list(str))* -- Sample(s) name(s) from SGMLive that you want to process. + +### Keywords: +>**mcas** *(list(str))* -- list of detector names for which the ROI summation should take place. + +>**user** *(str)* -- SGMLive account name, defaults to current jupyterhub user. + +>**out** *(os.path / str)* -- System path to output directory for csv file(s) + +>**I0** *(pandas.DataFrame)** -- Dataframe including an incoming flux profile to be joined to the sample +dataframe and included in the each CSV file. + +>**ROI** *(tuple)** -- Set the upper and lower bin number for the Region-of-Interest integration to be used in +reducing the dimensionality of energy MCA data. + +### Returns: +>**list(pd.DataFrame)** -- list of dataframes created. + +## OneList +----- + +### Description: +>List extension that will return the sole item of the list if len(list) == 1 + +### Usage: +```python +data = {"key":1} +l = OneList([data]) +assert l == data +print(l['key']) #prints 1 +l.append(2) +print(l[1]) #prints 2 +assert l == data #raises Error +``` + +## DisplayDict +----- + +### Description +>dict class extension that includes repr_html for key,value display in Jupyter. + + diff --git a/docs/_build/html/_images/output_15_1.png b/docs/_build/html/_images/output_15_1.png new file mode 100644 index 0000000..dbf16b0 Binary files /dev/null and b/docs/_build/html/_images/output_15_1.png differ diff --git a/docs/_build/html/_images/output_16_1.png b/docs/_build/html/_images/output_16_1.png new file mode 100644 index 0000000..c0e5118 Binary files /dev/null and b/docs/_build/html/_images/output_16_1.png differ diff --git a/docs/_build/html/_images/output_17_1.png b/docs/_build/html/_images/output_17_1.png new file mode 100644 index 0000000..3e5ef18 Binary files /dev/null and b/docs/_build/html/_images/output_17_1.png differ diff --git a/docs/_build/html/_images/output_38_1.png b/docs/_build/html/_images/output_38_1.png new file mode 100644 index 0000000..e4725c5 Binary files /dev/null and b/docs/_build/html/_images/output_38_1.png differ diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..eea5d07 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,70 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import myst_parser +import os +import sys +sys.path.insert(0, os.path.abspath('..')) + + +# -- Project information ----------------------------------------------------- + +project = 'sgm-data' +copyright = '2022, Canadian Light Source Inc.' +author = 'Zachary Arthur' + +# The full version, including alpha/beta/rc tags +release = '0.4.0b' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ['myst_parser', + 'sphinx.ext.autodoc' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +def docstring(app, what, name, obj, options, lines): + try: + md = '\n'.join(lines) + ast = myst_parser.sphinx_parser.MystParser().parse(md) + rst = myst_parser.ReStructuredTextRenderer().render(ast) + lines.clear() + lines[:] += rst.splitlines() + except: + pass + +def setup(app): + app.connect('autodoc-process-docstring', docstring) \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..7d9bdcc --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,22 @@ +.. sgm-data documentation master file, created by + sphinx-quickstart on Thu Mar 3 15:43:04 2022. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to sgm-data's documentation! +==================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + GettingStarted.md + Example Notebook sgm-data.md + Load.md + Utilities.md + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`search` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..2119f51 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/output_15_1.png b/docs/output_15_1.png new file mode 100644 index 0000000..dbf16b0 Binary files /dev/null and b/docs/output_15_1.png differ diff --git a/docs/output_16_1.png b/docs/output_16_1.png new file mode 100644 index 0000000..c0e5118 Binary files /dev/null and b/docs/output_16_1.png differ diff --git a/docs/output_17_1.png b/docs/output_17_1.png new file mode 100644 index 0000000..3e5ef18 Binary files /dev/null and b/docs/output_17_1.png differ diff --git a/docs/output_38_1.png b/docs/output_38_1.png new file mode 100644 index 0000000..e4725c5 Binary files /dev/null and b/docs/output_38_1.png differ diff --git a/getdocstrings.py b/getdocstrings.py new file mode 100644 index 0000000..872e240 --- /dev/null +++ b/getdocstrings.py @@ -0,0 +1,14 @@ +import sgmdata +from sgmdata import utilities + + + +with open('./docs/Utilities.md', 'w') as f: + doc = utilities.__doc__ + for lines in doc.split('\n'): + f.write(f"{lines.lstrip()}\n") + +with open('./docs/Load.md', 'w') as f: + doc = sgmdata.__doc__ + for lines in doc.split('\n'): + f.write(f"{lines.lstrip()}\n") \ No newline at end of file diff --git a/sgmdata/__init__.py b/sgmdata/__init__.py index ff68995..6cfca87 100644 --- a/sgmdata/__init__.py +++ b/sgmdata/__init__.py @@ -1,6 +1,31 @@ from sgmdata import * -from sgmdata.load import SGMData +from sgmdata.load import SGMData, SGMScan from sgmdata.search import SGMQuery +from sgmdata.report import ReportBuilder from sgmdata.utilities.util import badscans, preprocess +from sgmdata.interpolate import interpolate +from sgmdata.xrffit import fit_peaks from .version import __version__ + +__doc__ = f""" +# API +## SGMData +----- +{SGMData.__doc__} +## SGMScan +----- +{SGMScan.__doc__} +## SGMQuery +----- +{SGMQuery.__doc__} +## ReportBuilder +----- +{ReportBuilder.__doc__} +## interpolate() +----- +{interpolate.__doc__} +## fit_peaks() +_____ +{fit_peaks.__doc__} +""" diff --git a/sgmdata/interpolate.py b/sgmdata/interpolate.py index f253eed..76b4336 100644 --- a/sgmdata/interpolate.py +++ b/sgmdata/interpolate.py @@ -62,20 +62,25 @@ def shift_cmesh(x, shift=0.5): def interpolate(independent, signals, command=None, **kwargs): """ ### Description: - Creates the bins required for each independent axes to be histogrammed into for interpolation, - then uses dask dataframe groupby commands to perform a linear interpolation. + >Creates the bins required for each independent axes to be histogrammed into for interpolation, + then uses dask dataframe groupby commands to perform a linear interpolation. ### Args: - >**independent** *(dict)* -- Dictionary of independent axes from SGMScan.entry - >**signals** *(dict)* -- Dictionary of signals from SGMScan.entry + >**independent** *(dict)* -- Dictionary of independent axes from SGMScan.entry + + >**signals** *(dict)* -- Dictionary of signals from SGMScan.entry ### Keywords: - >**start** *(list or number)* -- starting position of the new array - >**stop** *(list or number)* -- ending position of the new array - >**bins** *(list of numbers or arrays)* -- this can be an array of bin values for each axes, + >**start** *(list or number)* -- starting position of the new array + + >**stop** *(list or number)* -- ending position of the new array + + >**bins** *(list of numbers or arrays)* -- this can be an array of bin values for each axes, or can be the number of bins desired. - >**resolution** *(list or number)* -- used instead of bins to define the bin to bin distance. - >**sig_digits** *(int)* -- used to overide the default uncertainty of the interpolation axis of 2 (e.g. 0.01) + + >**resolution** *(list or number)* -- used instead of bins to define the bin to bin distance. + + >**sig_digits** *(int)* -- used to overide the default uncertainty of the interpolation axis of 2 (e.g. 0.01) """ compute = kwargs.get('compute', True) method = kwargs.get('method', 'nearest') diff --git a/sgmdata/load.py b/sgmdata/load.py index f5e19d1..7d2d32e 100644 --- a/sgmdata/load.py +++ b/sgmdata/load.py @@ -37,10 +37,30 @@ class SGMScan(DisplayDict): """ ### Description: - ----- - Data class for storing dask arrays for SGM data files that have been grouped into 'NXentry', - and then divided into signals, independent axes, and other data. Contains convenience classes - for interpolation. + >Data class for storing dask arrays for SGM data files that have been grouped into 'NXentry', + and then divided into signals, independent axes, and other data. Contains convenience classes + for interpolation. + + ### Functions: + >**interpolate()** -- for each scan entry in self.items() there is a SGMScan.entry.interpolate() function, + see interpolate() documentation. + + >**plot()** -- for each scan entry in self.items() there exists a SGMScan.entry.plot() method for displaying the + contained data with bokeh. + + >**fit_mcas()** -- for each scan entry in self.items() there exists a SGMScan.entry.fit_mcas() method for gaussian + peak fitting of the interpolated mca data. Returns resulting dataframe. + + >**get_arr()** -- for each scan entry in self.items() there exists a SGMScan.entry.get_arr() which will return a numpy array + from an stored interpolated dataframe by using a keyword filter: + ```python + from sgmdata import SGMData + + data = SGMData('file.nxs') + data.interpolate() + sdd1 = data.get_arr('sdd1') + sdd1.shape # (1290, 256) + ``` """ class DataDict(DisplayDict): @@ -48,14 +68,11 @@ class DataDict(DisplayDict): def get_arr(self, detector): """ ### Description: - ----- - Function to return a numpy array from the internal pandas dataframe, for a given detector. + >Function to return a numpy array from the internal pandas dataframe, for a given detector. ### Args: - ----- - >**detector** *(str)* -- Name of detector. + >**detector** *(str)* -- Name of detector. ### Returns: - ----- - >**detector** *(ndarray)* + >**detector** *(ndarray)* """ if isinstance(detector, str): try: @@ -119,11 +136,9 @@ def fit_mcas(self, detectors=[], emission=[]): def read(self, filename=None): """ ### Description - ----- - Function to load in already processed data from file. + >Function to load in already processed data from file. ### Keywords - ----- - >**filename** *(str)* -- path to file on disk. + >**filename** *(str)* -- path to file on disk. """ if not filename: return [] @@ -168,11 +183,9 @@ def read(self, filename=None): def write(self, filename=None): """ ### Description: - ----- - Write data to NeXuS formatted data file. + >Write data to NeXuS formatted data file. ### Keyword: - ----- - >**filename** *(str / os.path)* -- path/name of file for output. + >**filename** *(str / os.path)* -- path/name of file for output. """ if 'sdd3' in self['signals']: signal = u'sdd3' @@ -226,8 +239,7 @@ def write(self, filename=None): def plot(self, **kwargs): """ ### Description - ----- - Determines the appropriate plot based on independent axis number and name. + >Determines the appropriate plot based on independent axis number and name. """ dim = len(self.independent) if dim == 1 and 'en' in self.independent.keys(): @@ -418,38 +430,40 @@ def __getitem__(self, item): class SGMData(object): """ ### Description: - ----- Class for loading in data from h5py or h5pyd files for raw SGM data. To substantiate pass the class pass a single (or list of) system file paths - (or hsds path). e.g. data = SGMData('/path/to/my/file.nxs') or SGMData(['1.h5', '2.h5']) + (or hsds path). e.g. data = SGMData('/path/to/my/file.nxs') or SGMData(['1.h5', '2.h5']). The data is auto grouped into three classifications: "independent", "signals", and "other". You can view the data dictionary representation in a Jupyter cell by just invoking the SGMData() object. ### Args: - ----- >**file_paths** *(str or list)* List of file names to be loaded in by the data module. ### Keywords: - ----- >**npartitions** *(type: integer)* -- choose how many divisions (threads) to split the file data arrays into. + >**scheduler** *(type: str)* -- use specific dask cluster for operations, e.g. 'dscheduler:8786' + >**axes** *(type: list(str))* -- names of the axes to use as independent axis and ignore spec command issued + >**threads** *(type: int)* -- set the number of threads in threadpool used to load in data. + >**scan_type** *(type: str)* -- used to filter the type of scan loaded, e.g. 'cmesh', ' + >**shift** *(type: float)* -- default 0.5. Shifting 'x' axis data on consecutive passes of stage for cmesh scans. ### Functions: - ----- - >**interpolate()** -- takes in same parameters as SGMScan.entry.interpolate() + >**interpolate()** -- botch operation on all scans in SGMData, takes in same parameters as interpolate(), + see interpolate() documentation. >**mean()** -- averages all interpolated data together (organized by sample, scan type & range), returns list, saves data under a dictionary in SGMData().averaged - Attributes - ----- + + ### Attributes >**scans** *(SGMScan)* By default the query will create an SGMData object containing your data, this can be turned off with the data keyword. >**averaged** *(list)*. Contains the averaged data from all interpolated datasets contained in the scan. @@ -786,7 +800,6 @@ def interpolate(self, **kwargs): entries.append(entry) with ThreadPool(self.threads) as pool: results = list(tqdm(pool.imap_unordered(_interpolate, entries), total=len(entries))) - results = [r for r in results if r is not None] self.interpolated = True return results diff --git a/sgmdata/report.py b/sgmdata/report.py index ceb3169..30f0b4e 100644 --- a/sgmdata/report.py +++ b/sgmdata/report.py @@ -20,22 +20,44 @@ class ReportBuilder(object): """ ### Description - ----- - LaTeX document builder for SGMData mail-in program. Requires connection to CLS internal confluence site, and - assembles documents from the experimental logs saved therein. + >LaTeX document builder for SGMData mail-in program. Requires connection to CLS internal confluence site, and + assembles documents from the experimental logs saved therein. ### Args - ----- - > **proposal** *(str)* -- Project proprosal number (in the title of the confluence page) - > **principal** *(str)* -- The last name of the PI for the project, included in the title of the confluence page. - > **cycle** *(int)* -- The cycle for which the report data was collected. - > **session** *(int)* -- The experiment number from SGMLive - > **shifts** *(int)* -- The number of shifts used to collected this data (information can be found in SGMLive + > **proposal** *(str)* -- Project proprosal number (in the title of the confluence page) + + > **principal** *(str)* -- The last name of the PI for the project, included in the title of the confluence page. + + > **cycle** *(int)* -- The cycle for which the report data was collected. + + > **session** *(int)* -- The experiment number from SGMLive + + > **shifts** *(int)* -- The number of shifts used to collected this data (information can be found in SGMLive usage data) ### Functions - > **create_sample_report(plots=True, key=None, process=True)** -- If initialization has gone smoothly, you can - create the sample report. + > **create_sample_report(plots=True, key=None, process=True)** -- If initialization of object has + gone smoothly, you can create the sample report. + + >> kwargs: plots - create plots; key - specific sample holder, e.g. 'Holder A - 5803b7d0'; process - interpolate + and average data not already processed in user account. For additional kwargs, see preprocess [documentation]( + /Utilities.html#preprocess). + + ### Attributes: + >**log** *(str)* -- Data collected from confluence API, can be useful for debugging. + + >**paths** *(list)* -- Contains the local paths to your data (or processed_data if processed=True). + + ### Example Usage: + ```python + from sgmdata import SGMQuery + + sgmq = SGMQuery(sample="TiO2 - C", processed=True) + data = sgmq.data + data.averaged['TiO2 - C'].plot() + ``` + + """ def __init__(self, proposal, principal, cycle, session, shifts, **kwargs): diff --git a/sgmdata/search.py b/sgmdata/search.py index 3ced85c..71a1cc2 100644 --- a/sgmdata/search.py +++ b/sgmdata/search.py @@ -34,38 +34,44 @@ class SGMQuery(object): """ ### Description: - ----- - You can find your data in the SGMLive database by using the SGMQuery module (when using the [SGM JupyterHub]( - https://sgm-hub.lightsource.ca) ). The following documentation details the keywords that you can use to customize your - search. + >You can find your data in the SGMLive database by using the SGMQuery module (when using the [SGM JupyterHub]( + https://sgm-hub.lightsource.ca) ). The following documentation details the keywords that you can use to customize your + search. ### Keywords: - ----- - >**sample** *(str:required)* -- At minimum you'll need to provide the keyword "sample", corresponding the sample - name in the database as a default this will grab all the data under that sample - name. + >**sample** *(str:required)* -- At minimum you'll need to provide the keyword "sample", corresponding the sample + name in the database as a default this will grab all the data under that sample + name. - >**daterange** *(tuple:optional)* -- This can be used to sort through sample data by the day that it was - acquired. This is designed to take a tuple of the form ("start-date", - "end-date") where the strings are of the form "YYYY-MM-DD". You can also - just use a single string of the same form, instead of a tuple, this will - make the assumption that "end-date" == now(). + >**daterange** *(tuple:optional)* -- This can be used to sort through sample data by the day that it was + acquired. This is designed to take a tuple of the form ("start-date", + "end-date") where the strings are of the form "YYYY-MM-DD". You can also + just use a single string of the same form, instead of a tuple, this will + make the assumption that "end-date" == now(). - >**data** *(bool:optional)* -- As a default (True) the SGMQuery object will try to load the the data from disk, - if this is not the desired behaviour set data=False. + >**data** *(bool:optional)* -- As a default (True) the SGMQuery object will try to load the the data from disk, + if this is not the desired behaviour set data=False. - >**user** *(str:optional:staffonly)* -- Can be used to select the username in SGMLive from which the sample query is - performed. Not available to non-staff. + >**user** *(str:optional:staffonly)* -- Can be used to select the username in SGMLive from which the sample query is + performed. Not available to non-staff. - >**processed** *(bool:optional)* -- Can be used to return the paths for the processed data (already interpolated) instead - of the raw. You would generally set data = False for this option. + >**processed** *(bool:optional)* -- Can be used to return the paths for the processed data (already interpolated) instead + of the raw. You would generally set data = False for this option. - ### Attributes - ----- - >**data** *(object)* -- By default the query will create an SGMData object containing your data, this can be turned off + ### Attributes: + >**data** *(object)* -- By default the query will create an SGMData object containing your data, this can be turned off with the data keyword. - >**paths** *(list)* -- Contains the local paths to your data (or processed_data if processed=True). + >**paths** *(list)* -- Contains the local paths to your data (or processed_data if processed=True). + + ### Example Usage: + ```python + from sgmdata import SGMQuery + + sgmq = SGMQuery(sample="TiO2 - C", processed=True) + data = sgmq.data + data.averaged['TiO2 - C'].plot() + ``` """ def __init__(self, **kwargs): diff --git a/sgmdata/utilities/__init__.py b/sgmdata/utilities/__init__.py index 1a5e3fb..547619e 100644 --- a/sgmdata/utilities/__init__.py +++ b/sgmdata/utilities/__init__.py @@ -1,4 +1,32 @@ from .util import h5tree, scan_health, printTree, plot1d, preprocess, badscans, create_csv from .lib import scan_lib +from .magicclass import OneList, DisplayDict -__all__ = ['h5tree', 'scan_health', 'plot1d', 'scan_lib', 'preprocess', 'badscans', 'create_csv'] \ No newline at end of file +__all__ = ['h5tree', 'scan_health', 'plot1d', 'scan_lib', 'preprocess', 'badscans', 'create_csv'] +__doc__ = f""" +# Utilities +## h5tree +----- +{h5tree.__doc__} +## plot1d +----- +{plot1d.__doc__} +## preprocess +----- +{preprocess.__doc__} +## badscans +----- +{badscans.__doc__} +## scan_health +----- +{scan_health.__doc__} +## create_csv +----- +{create_csv.__doc__} +## OneList +----- +{OneList.__doc__} +## DisplayDict +----- +{DisplayDict.__doc__} +""" \ No newline at end of file diff --git a/sgmdata/utilities/magicclass.py b/sgmdata/utilities/magicclass.py index 4a6e502..a1eb424 100644 --- a/sgmdata/utilities/magicclass.py +++ b/sgmdata/utilities/magicclass.py @@ -2,7 +2,10 @@ class DisplayDict(OrderedDict): - + """ + ### Description + >dict class extension that includes repr_html for key,value display in Jupyter. + """ def __init__(self, *args, **kwargs): super(DisplayDict, self).__init__(*args, **kwargs) @@ -33,6 +36,7 @@ def _repr_html_(self): def _repr_console_(self): """ + ### Description Takes own data and organizes it into a console-friendly table. """ final_data = '' @@ -47,6 +51,21 @@ def update(self, *args, **kwargs): class OneList(list): + """ + ### Description: + >List extension that will return the sole item of the list if len(list) == 1 + + ### Usage: + ```python + data = {"key":1} + l = OneList([data]) + assert l == data + print(l['key']) #prints 1 + l.append(2) + print(l[1]) #prints 2 + assert l == data #raises Error + ``` + """ def __init__(self, iterable, **kwargs): self.l = list(iterable) for i in range(0, len(self.l)): diff --git a/sgmdata/utilities/util.py b/sgmdata/utilities/util.py index 33bba0c..903088c 100644 --- a/sgmdata/utilities/util.py +++ b/sgmdata/utilities/util.py @@ -55,17 +55,22 @@ def printTree(name, node): def h5tree(h5): """ ### Description: - ----- - A function to output the data-tree from an hdf5 file object. + >A function to output the data-tree from an hdf5 file object. ### Args: - ----- - >**h5** *(h5py.File)* -- Any H5File object, from h5py. + >**h5** *(h5py.File)* -- Any H5File object, from h5py. ### Returns: - ----- - >**None** + >**None** + ### Example Usage: + ```python + from sgmdata.utilities import h5tree + import h5py + + f = h5py.File("Filename.nxs", 'r') + h5tree(f) + ``` """ h5.visititems(printTree) @@ -75,17 +80,14 @@ def h5tree(h5): def get_moving_average(data, window_size=4): """ ### Description: - ----- A function to calculate the moving average of data using numpy's implementation of convolution ### Args: - ----- - > **data** *(numpy.ndarray)* -- A 1d numpy array of the data on which to calculate the moving average + > **data** *(numpy.ndarray)* -- A 1d numpy array of the data on which to calculate the moving average window_size (int): An integer value of the number of samples to consider when averaging ## Returns: - ----- - > **m_average** *(numpy.ndarray)* -- A 1d numpy array of the calculated moving average. The size of + > **m_average** *(numpy.ndarray)* -- A 1d numpy array of the calculated moving average. The size of "m_average" is the same as the size of input "data" """ @@ -106,21 +108,20 @@ def get_moving_slope(dep, indep, window_size=4): def test_abrupt_change(detector, sigma=10.0, tolerance=1000.0): """ ### Description: - ----- A function to detect the percentage of abrupt change in a single detector data ### Args: - ----- - >**detector** *(tuple)*: A python tuple in the form (detector_name, data). The detector_name is a + >**detector** *(tuple)*: A python tuple in the form (detector_name, data). The detector_name is a string, while data is a numpy array of the data to detect abrupt change - >**sigma** *(float)*: A float value for standard deviation. This number define how different a specific + + >**sigma** *(float)*: A float value for standard deviation. This number define how different a specific count should be from the standard deviation to be considered abrupt change - >**tolerance** *(float)*: A float value specifying the absolute tolerance parameter for detecting if + + >**tolerance** *(float)*: A float value specifying the absolute tolerance parameter for detecting if two numbers should be considered close to each other ### Returns: - ----- - >**str**: Percentage of the data that is normal count and the percentage the fuction think might be abrupt change + >**str**: Percentage of the data that is normal count and the percentage the fuction think might be abrupt change """ # Get the detector name and the actual data @@ -167,23 +168,23 @@ def test_abrupt_change(detector, sigma=10.0, tolerance=1000.0): def test_detector_count_rates(detector, scalar_range=(5000, 500000), sdds_range=(1000, 30000)): """ ### Description: - ----- - A function to detect if count rates of a specific detector are within a defined range + >A function to detect if count rates of a specific detector are within a defined range ### Args: - ----- - >**detector** *(tuple)*: A python tuple in the form (detector_name, data). The detector_name is a + >**detector** *(tuple)*: A python tuple in the form (detector_name, data). The detector_name is a string, while data is a numpy array of the data to detect the count rates - >**tey_range** *(tuple)*: A python tuple defining the normal count range for the tey detector + + >**tey_range** *(tuple)*: A python tuple defining the normal count range for the tey detector in the form (min_normal_count, max_normal_count) - >**io_range** *(tuple)* -- A python tuple defining the normal count range for the io detector + + >**io_range** *(tuple)* -- A python tuple defining the normal count range for the io detector in the form (min_normal_count, max_normal_count) - >**sdds_range** *(tuple)* -- A python tuple defining the normal count range for the sdd[1-4] + + >**sdds_range** *(tuple)* -- A python tuple defining the normal count range for the sdd[1-4] detectors in the form (min_normal_count, max_normal_count) ### Returns: - ----- - >**str** -- Percentage of the data that is within normal count range and the percentage + >**str** -- Percentage of the data that is within normal count range and the percentage that is outside the defined normal count range """ @@ -226,19 +227,17 @@ def test_detector_count_rates(detector, scalar_range=(5000, 500000), sdds_range= def test_beam_dump(detector, indep): """ ### Description: - ----- - A function to detect the percentage of beam dump in a single detector data + >A function to detect the percentage of beam dump in a single detector data ### Args: - ----- - >**detector** *(tuple)* -- A python tuple in the form (detector_name, data). + >**detector** *(tuple)* -- A python tuple in the form (detector_name, data). The detector_name is a string, while data is a numpy array of the data to detect beam dump - >**indep** *(numpy.ndarray)* -- A numpy array of the independent variable data + + >**indep** *(numpy.ndarray)* -- A numpy array of the independent variable data ### Returns: - ----- - >*str* -- Percentage of the data that is normal count and the percentage + >*str* -- Percentage of the data that is normal count and the percentage the fuction think is a beam dump """ @@ -354,17 +353,17 @@ def test_beam_dump(detector, indep): def scan_health(df, verbose=False, sdd_max=105000, length=None): """ ### Description: - ----- - Function takes in a interpolated scan (a pandas DataFrame), and returns the overall health. + >Function takes in a interpolated scan (a pandas DataFrame), and returns the overall health. ### Args: - ----- - >**df** *(DataFrame)* -- pandas dataframe from SGMScan.binned. - >**verbose** *(bool)* -- Explain the returned output in plain text. - >**sdd_max** *(int)* -- 105000 (default) - saturation value for total SDD counts/s + >**df** *(DataFrame)* -- pandas dataframe from SGMScan.binned. + + >**verbose** *(bool)* -- Explain the returned output in plain text. + >**sdd_max** *(int)* -- 105000 (default) - saturation value for total SDD counts/s - returns (tuple): (Discontiunty %, Beam-dump %, Saturation %) + ### Returns: + >(tuple): (Discontiunty %, Beam-dump %, Saturation %) """ EN = df.index.to_numpy() @@ -407,20 +406,20 @@ def scan_health(df, verbose=False, sdd_max=105000, length=None): def badscans(interp, **kwargs): """ ### Description: - Batch calculation of sgmdata.utilities.scan_health for list of interpolated dataframes. + >Batch calculation of sgmdata.utilities.scan_health for list of interpolated dataframes. ### Args: - interp (list) : list of SGMScan binned dataframes. + >interp (list) : list of SGMScan binned dataframes. ### Returns: - List of indexes for bad scans in interp. + >List of indexes for bad scans in interp. """ cont = kwargs.get('cont', 55) dump = kwargs.get('dump', 30) sat = kwargs.get('sat', 60) sdd_max = kwargs.get('sdd_max', 50000) - length = np.bincount([len(i) for i in interp]).argmax() + length = np.bincount([len(i) for i in interp if i is not None]).argmax() bad_scans = [] health = [scan_health(i, sdd_max=sdd_max, length=length) for i in interp] pbar = tqdm(health) @@ -435,32 +434,36 @@ def badscans(interp, **kwargs): def preprocess(sample, **kwargs): """ ### Description: - ----- - Utility for automating the interpolation and averaging of a sample in the SGMLive website. + >Utility for automating the interpolation and averaging of a sample in the SGMLive website. ### Args: - ----- - >**sample** *(str)*: The name of the sample in your account that you wish to preprocess. + >**sample** *(str)*: The name of the sample in your account that you wish to preprocess. ### Keywords: - ----- - All of the below are optional. - >**user** *(str)* -- name of user account to limit search to (for use by staff). + >All of the below are optional. - >**resolution** *(float)* -- to be passed to interpolation function, this is histogram bin width. + >**user** *(str)* -- name of user account to limit search to (for use by staff). - >**start** *(float)* -- start energy to be passed to interpolation function. + >**resolution** *(float)* -- to be passed to interpolation function, this is histogram bin width. - >**stop** *(float)* -- stop energy to be passed to interpolation function. + >**start** *(float)* -- start energy to be passed to interpolation function. - >**sdd_max** *(int)* -- threshold value to determine saturation in SDDs, to determine scan_health (default + >**stop** *(float)* -- stop energy to be passed to interpolation function. + + >**sdd_max** *(int)* -- threshold value to determine saturation in SDDs, to determine scan_health (default is 105000). - >**bscan_thresh** *(tuple)* -- (continuous, dumped, and saturated) these are the threshold percentages from + >**bscan_thresh** *(tuple)* -- (continuous, dumped, and saturated) these are the threshold percentages from scan_health that will label a scan as 'bad'. ### Returns: - ----- - (HTML) hyperlink for preprocessed data stored in SGMLive + >(HTML) hyperlink for preprocessed data stored in SGMLive + + ### Example Usage: + ```python + from sgmdata import preprocess + + preprocess(sample="TiO2", user='regiert', resolution=0.1) + ``` """ from sgmdata.search import SGMQuery from sgmdata.load import SGMData @@ -519,25 +522,26 @@ def sumROI(arr, start, stop): def create_csv(sample, mcas=None, **kwargs): """ ### Description: - ----- - Make CSV file from sample(s) + >Make CSV file from sample(s) + ### Args: - ----- - >**sample** *(str or list(str))* -- Sample(s) name(s) from SGMLive that you want to process. + >**sample** *(str or list(str))* -- Sample(s) name(s) from SGMLive that you want to process. ### Keywords: - ----- - >**mcas** *(list(str))* -- list of detector names for which the ROI summation should take place. - >**user** *(str)* -- SGMLive account name, defaults to current jupyterhub user. - >**out** *(os.path / str)* -- System path to output directory for csv file(s) - >**I0** *(pandas.DataFrame)** -- Dataframe including an incoming flux profile to be joined to the sample - dataframe and included in the each CSV file. - >**ROI** *(tuple)** -- Set the upper and lower bin number for the Region-of-Interest integration to be used in - reducing the dimensionality of energy MCA data. + >**mcas** *(list(str))* -- list of detector names for which the ROI summation should take place. + + >**user** *(str)* -- SGMLive account name, defaults to current jupyterhub user. + + >**out** *(os.path / str)* -- System path to output directory for csv file(s) + + >**I0** *(pandas.DataFrame)** -- Dataframe including an incoming flux profile to be joined to the sample + dataframe and included in the each CSV file. + + >**ROI** *(tuple)** -- Set the upper and lower bin number for the Region-of-Interest integration to be used in + reducing the dimensionality of energy MCA data. ### Returns: - ----- - >**list(pd.DataFrame)** -- list of dataframes created. + >**list(pd.DataFrame)** -- list of dataframes created. """ from slugify import slugify from sgmdata.search import SGMQuery @@ -608,17 +612,19 @@ def create_csv(sample, mcas=None, **kwargs): def plot1d(xarr,yarr, title="Plot", labels=[]): """ ### Description: - ----- - Convenience function for plotting a bokeh lineplot, assumes Bokeh is already loaded. + >Convenience function for plotting a bokeh lineplot, assumes Bokeh is already loaded. ### Args: - ----- - >**xarr** *(array-like)* -- Independent array-like object, or list of array-like objects. - >**yarr** *(array-like)* -- Dependent array-like object, or list of array-like objects, same shape as xarr - >**title** *(str)* -- Plot title - >**labels** *(list(str))* -- Legend labels for multiple objects, defaults to Curve0, Curve1, etc. + >**xarr** *(array-like)* -- Independent array-like object, or list of array-like objects. + + >**yarr** *(array-like)* -- Dependent array-like object, or list of array-like objects, same shape as xarr + + >**title** *(str)* -- Plot title - returns None + >**labels** *(list(str))* -- Legend labels for multiple objects, defaults to Curve0, Curve1, etc. + + ### Returns: + >**None** """ TOOLS = 'pan, hover,box_zoom,box_select,crosshair,reset,save' diff --git a/sgmdata/xrffit.py b/sgmdata/xrffit.py index 9e21939..4313afd 100644 --- a/sgmdata/xrffit.py +++ b/sgmdata/xrffit.py @@ -74,6 +74,18 @@ def fit_amp(args): def fit_peaks(emission, sdd, bounds=[]): + """ + ### Description: + Method for fitting multiple interpolated SDD numpy arrays with a sum of gaussians. + + ### Args: + >**emission** *(ndarray)* -- labels for xrf bins + + >**sdd** *(list)* -- list of sdd detector signals filtered from dataframe. + + ### Keywords: + >**bounds** *(list)* -- list of len 2, included start and stop bin of mcas to be fit. + """ if not isinstance(sdd, list): sdd = [sdd] names = [list(s)[0].split('-')[0] for s in sdd]