Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PFB inversion added as a reader class for ARO data #43

Open
wants to merge 3 commits into
base: fft-trials
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions scintellometry/folding/fold.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,13 +249,6 @@ def fold(fh, comm, samplerate, fedge, fedge_at_top, nchan,
assert raw.dtype.kind == 'c'
vals = raw

if fedge_at_top:
# take complex conjugate to ensure by-channel de-dispersion is
# applied correctly.
# This needs to be done for ARO data, since we are in 2nd Nyquist
# zone; not clear it is needed for other telescopes.
np.conj(vals, out=vals)

# For pre-channelized data, data are always complex,
# and should have shape (ntint, nchan, npol).
# For baseband data, we wish to get to the same shape for
Expand All @@ -277,6 +270,13 @@ def fold(fh, comm, samplerate, fedge, fedge_at_top, nchan,
raise TypeError("Can no longer deal with scipy's format "
"for storing FTs of real data.")

if fedge_at_top:
# take complex conjugate to ensure by-channel de-dispersion is
# applied correctly.
# This needs to be done for ARO data, since we are in 2nd Nyquist
# zone; not clear it is needed for other telescopes.
np.conj(vals, out=vals)

# Now we coherently dedisperse, either all of it or by channel.
if need_fine_channels:
# for by_channel, we have vals.shape=(ntint, nchan, npol),
Expand Down
2 changes: 1 addition & 1 deletion scintellometry/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from .aro import AROdata
from .lofar import LOFARdata, LOFARdata_Pcombined
from .gmrt import GMRTPhasedData, GMRTRawDumpData
from .arochime import AROCHIMEData, AROCHIMERawData
from .arochime import AROCHIMEData, AROCHIMERawData, AROCHIMEInvPFB
from .dada import DADAData
from .mark4 import Mark4Data
from .mark5b import Mark5BData
Expand Down
96 changes: 95 additions & 1 deletion scintellometry/io/arochime.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from . import SequentialFile, header_defaults

from scintellometry.ppf import pfb
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tiny, but use from ..ppf import pfb


class AROCHIMEData(SequentialFile):

Expand Down Expand Up @@ -167,6 +168,99 @@ def open(self, number=0):
return self.fh_raw


class AROCHIMEInvPFB(SequentialFile):

telescope = 'arochime-invpfb'

def __init__(self, raw_files, blocksize, samplerate, fedge, fedge_at_top,
time_offset=0.0*u.s, dtype='4bit,4bit', comm=None):
"""ARO data acquired with a CHIME correlator, PFB inverted.

The PFB inversion is imperfect at the edges. To do this properly,
need to read in multiple blocks at a time (not currently implemented).

Also, this will ideally be read as sets of 2048 samples
(ie: read as dtype (2048,)4bit: 1024)
"""

self.fh_raw = AROCHIMERawData(raw_files, blocksize, samplerate,
fedge, fedge_at_top, time_offset,
dtype='cu4bit,cu4bit', comm=comm)
self.time0 = self.fh_raw.time0
self.samplerate = self.fh_raw.samplerate
self.dtsample = (1 / self.samplerate).to(u.s)
self.npol = self.fh_raw.npol
self.fedge = self.fh_raw.fedge
self.fedge_at_top = self.fh_raw.fedge_at_top
super(AROCHIMEInvPFB, self).__init__(raw_files, blocksize, dtype,
nchan=1, comm=comm)
# PFB information
self.nblock = 2048
self.h = pfb.sinc_hamming(4, self.nblock).reshape(4, -1)
self.fh = None

# S/N for use in the Wiener Filter
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you copy the comments from my test script here? (I.e., the ones which say why this is the correct S/N)

# Assume 8 bits are set to have noise at 3 bits, so 1.5 bits for FT.
# samples off by uniform distribution of [-0.5, 0.5] ->
# equivalent to adding noise with std=0.2887
prec = (1 << 3) ** 0.5
self.sn = prec / 0.2887
print("Opened InvPFB reader, S/N={0}".format(self.sn))

def open(self, number=0):
self.fh_raw.open(number)

def close(self):
self.fh_raw.close()

def _seek(self, offset):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The more I thought about this, the less I liked it... We're not going to where we said we would, and this causes problems below with raw_start, raw_end. I think the easiest solution is to keep self.recordsize consistent with the raw one. This implies that although we have a full stream of samples, that the folding code should think of it as sets of 2048 samples. I think that all that would be needed to get this working is to change the dtype to an equivalent of 2048u4bit...

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With that choice, one would redefine

def _seek(self, offset):
    self.fh_raw._seek(offset)

if offset % self.recordsize != 0:
raise ValueError("Cannot offset to non-integer number of records")

self.offset = (offset // self.fh_raw.recordsize *
self.fh_raw.recordsize)

def seek_record_read(self, offset, size):
print('Inverting PFB... ')
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a comment or docstring here, which described what is done, and also notes that with the present implementation, there are some edge effects? (i.e., that to do it right, one would need to read in more than requested).

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that we are moving out of testing mode, I think this print statement should be removed (or have some test on verbose, although I guess one does not have access to that here)

raw_start = (offset // self.fh_raw.recordsize) * self.fh_raw.recordsize
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the above change, we would not have to calculate raw_start, etc., any more, but could just use offset and size directly.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the code essentially doesn't work if offset and size are not integer multiples of self.fh_raw.recordsize, I would just add tests that both are consistent, and remove the raw_start, raw_end usage.

raw_end = ((offset + size + self.fh_raw.recordsize - 1) //
self.fh_raw.recordsize) * self.fh_raw.recordsize
raw = self.fh_raw.seek_record_read(raw_start, raw_end-raw_start)

if self.npol == 2:
raw = raw.view(raw.dtype.fields.values()[0][0])

raw = raw.reshape(-1, self.fh_raw.nchan, self.npol)

nyq_pad = np.zeros((raw.shape[0], 1, self.npol), dtype=raw.dtype)
raw = np.concatenate((raw, nyq_pad), axis=1)
# Get pseudo-timestream
print('Getting pseudo timestream')
pd = np.fft.irfft(raw, axis=1)
# Set up for deconvolution
print('Setting up for deconvolution')
fpd = np.fft.rfft(pd, axis=0)
del pd
if self.fh is None or self.fh.shape[0] != fpd.shape[0]:
print('Getting FT of PFB')
lh = np.zeros((raw.shape[0], self.h.shape[1]))
lh[:self.h.shape[0]] = self.h
self.fh = np.fft.rfft(lh, axis=0).conj()
del lh
# FT of Wiener deconvolution kernel
fg = self.fh.conj() / (np.abs(self.fh)**2 + (1/self.sn)**2)
# Deconvolve and get deconvolved timestream
print('Wiener deconvolving')
rd = np.fft.irfft(fpd * fg[..., np.newaxis],
axis=0).reshape(-1, self.npol)
# select actual part requested
print('Selecting and returning')
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would also be removed.

rd = rd[offset - raw_start:offset - raw_start + size]
self.offset = offset + size
# view as a record array
return rd.astype('f4').view('f4,f4')


# GMRT defaults for psrfits HDUs
# Note: these are largely made-up at this point
header_defaults['arochime'] = {
Expand Down Expand Up @@ -199,7 +293,7 @@ def open(self, number=0):


header_defaults['arochime-raw'] = header_defaults['arochime']

header_defaults['arochime-invpfb'] = header_defaults['arochime']

def read_start_time(filename):
"""
Expand Down
6 changes: 3 additions & 3 deletions scintellometry/io/filehandlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@

# size in bytes of records read from file (simple for ARO: 1 byte/sample)
def dtype_itemsize(dtype):
bps = {'ci1': 2, '(2,)ci1': 4, 'ci1,ci1': 4,
'1bit': 0.125, '2bit': 0.25, '4bit': 0.5, 'c4bit': 1,
'cu4bit,cu4bit': 2}.get(dtype, None)
bps = {'ci1': 2, '(2,)ci1': 4, 'ci1,ci1': 4, '1bit': 0.125,
'2bit': 0.25, '4bit': 0.5, '4bit,4bit': 1, '(2048,)4bit': 1024,
'c4bit': 1, 'cu4bit,cu4bit': 2}.get(dtype, None)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would undo this change, since we are no longer needing it.

if bps is None:
bps = np.dtype(dtype).itemsize
return bps
Expand Down
1 change: 1 addition & 0 deletions scintellometry/meta/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def dada_files(file_fmt, fnbase, key, first, **kwargs):
'gmrt-raw': gmrt_rawfiles,
'arochime': vlbi_files,
'arochime-raw': vlbi_files,
'arochime-invpfb': vlbi_files,
'dada': dada_files,
'jbdada': dada_files,
'mark4': vlbi_files,
Expand Down
89 changes: 89 additions & 0 deletions scintellometry/ppf/pfb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import numpy as np
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment here to describe what this file is about, and where it came from.


def sinc_window(ntap, lblock):
"""Sinc window function.

Parameters
----------
ntaps : integer
Number of taps.
lblock: integer
Length of block.

Returns
-------
window : np.ndarray[ntaps * lblock]
"""
coeff_length = np.pi * ntap
coeff_num_samples = ntap * lblock

# Sampling locations of sinc function
X = np.arange(-coeff_length / 2.0, coeff_length / 2.0,
coeff_length / coeff_num_samples)

# np.sinc function is sin(pi*x)/pi*x, not sin(x)/x, so use X/pi
return np.sinc(X / np.pi)


def sinc_hamming(ntap, lblock):
"""Hamming-sinc window function.

Parameters
----------
ntaps : integer
Number of taps.
lblock: integer
Length of block.

Returns
-------
window : np.ndarray[ntaps * lblock]
"""

return sinc_window(ntap, lblock) * np.hamming(ntap * lblock)


def pfb(timestream, nfreq, ntap=4, window=sinc_hamming):
"""Perform the CHIME PFB on a timestream.

Parameters
----------
timestream : np.ndarray
Timestream to process
nfreq : int
Number of frequencies we want out (probably should be odd
number because of Nyquist)
ntaps : int
Number of taps.

Returns
-------
pfb : np.ndarray[:, nfreq]
Array of PFB frequencies.
"""

# Number of samples in a sub block
lblock = 2 * (nfreq - 1)

# Number of blocks
nblock = timestream.size // lblock - (ntap - 1)

# Initialise array for spectrum
spec = np.zeros((nblock, nfreq), dtype=np.complex128)

# Window function
w = window(ntap, lblock)

# Iterate over blocks and perform the PFB
for bi in range(nblock):
# Cut out the correct timestream section
ts_sec = timestream[(bi*lblock):((bi+ntap)*lblock)].copy()

# Perform a real FFT (with applied window function)
ft = np.fft.rfft(ts_sec * w)

# Choose every n-th frequency
spec[bi] = ft[::ntap]

return spec