root/trunk/src/analysis.py

Revision 488, 18.7 KB (checked in by mpereira, 7 months ago)

corrected bug with units of analysis.make_kernel

Line 
1# -*- coding: utf8 -*-
2"""
3NeuroTools.analysis
4==================
5
6A collection of analysis functions that may be used by NeuroTools.signals or other packages.
7
8Classes
9-------
10
11TuningCurve - A tuning curve object (not very documented)
12
13
14Functions
15---------
16
17ccf                       - fast cross correlation function based on fft
18crosscorrelate            - cross-correlation between two series of discrete
19                            events (e.g. spikes)
20simple_frequency_spectrum - Simple frequencxy spectrum
21arrays_almost_equal       - comparison of two arrays
22make_kernel               - creates kernel functions for convolution
23"""
24
25import os
26
27import numpy
28
29from NeuroTools import check_dependency
30
31HAVE_PYLAB = check_dependency('pylab')
32HAVE_MATPLOTLIB = check_dependency('matplotlib')
33if HAVE_PYLAB:
34    import pylab
35else:
36    PYLAB_ERROR = "The pylab package was not detected"
37if not HAVE_MATPLOTLIB:
38    MATPLOTLIB_ERROR = "The matplotlib package was not detected"
39
40
41def arrays_almost_equal(a, b, threshold):
42    return (abs(a-b) < threshold).all()
43
44def ccf(x, y, axis=None):
45    """
46    Computes the cross-correlation function of two series x and y.
47    Note that the computations are performed on anomalies (deviations from
48    average).
49    Returns the values of the cross-correlation at different lags.
50       
51    Inputs:
52        x    - 1D MaskedArray of a Time series.
53        y    - 1D MaskedArray of a Time series.
54        axis - integer *[None]* Axis along which to compute (0 for rows, 1 for cols).
55               If `None`, the array is flattened first.
56   
57    Examples:
58        >> z= arange(1000)
59        >> ccf(z,z)
60
61    """
62    assert x.ndim == y.ndim, "Inconsistent shape !"
63#    assert(x.shape == y.shape, "Inconsistent shape !")
64    if axis is None:
65        if x.ndim > 1:
66            x = x.ravel()
67            y = y.ravel()
68        npad = x.size + y.size
69        xanom = (x - x.mean(axis=None))
70        yanom = (y - y.mean(axis=None))
71        Fx = numpy.fft.fft(xanom, npad, )
72        Fy = numpy.fft.fft(yanom, npad, )
73        iFxy = numpy.fft.ifft(Fx.conj()*Fy).real
74        varxy = numpy.sqrt(numpy.inner(xanom,xanom) * numpy.inner(yanom,yanom))
75    else:
76        npad = x.shape[axis] + y.shape[axis]
77        if axis == 1:
78            if x.shape[0] != y.shape[0]:
79                raise ValueError, "Arrays should have the same length!"
80            xanom = (x - x.mean(axis=1)[:,None])
81            yanom = (y - y.mean(axis=1)[:,None])
82            varxy = numpy.sqrt((xanom*xanom).sum(1) * (yanom*yanom).sum(1))[:,None]
83        else:
84            if x.shape[1] != y.shape[1]:
85                raise ValueError, "Arrays should have the same width!"
86            xanom = (x - x.mean(axis=0))
87            yanom = (y - y.mean(axis=0))
88            varxy = numpy.sqrt((xanom*xanom).sum(0) * (yanom*yanom).sum(0))
89        Fx = numpy.fft.fft(xanom, npad, axis=axis)
90        Fy = numpy.fft.fft(yanom, npad, axis=axis)
91        iFxy = numpy.fft.ifft(Fx.conj()*Fy,n=npad,axis=axis).real
92    # We juste turn the lags into correct positions:
93    iFxy = numpy.concatenate((iFxy[len(iFxy)/2:len(iFxy)],iFxy[0:len(iFxy)/2]))
94    return iFxy/varxy
95
96from NeuroTools.plotting import get_display, set_labels
97
98HAVE_PYLAB = check_dependency('pylab')
99
100
101def crosscorrelate(sua1, sua2, lag=None, n_pred=1, predictor=None,
102                   display=False, kwargs={}):
103    """
104    Calculates the cross-correlation between two vectors containing event times.
105    Returns (int, int_, norm). See below for details.
106   
107    Adapted from original script written by Martin P. Nawrot for the FIND MATLAB
108    toolbox.
109    See FIND - a unified framework for neural data analysis,
110        Meier R, Egert U, Aertsen A, Nawrot MP; Neural Netw. 2008 Oct;
111        21(8):1085-93.
112     
113    Inputs:
114        sua1      - array of event times. Can be either a column/row vector or a
115                    member of the SpikeTrain class.
116        sua2      - array of event times. Can be either a column/row vector or a
117                    member of the SpikeTrain class.
118                    If sua2 == sua1 the result is the
119                    autocorrelogram.
120        lag       - the max. lag for which relative event timing is considered
121                    with a max. difference of +/- lag. A default lag is computed
122                    from the inter-event interval of the longer of the two sua
123                    arrays
124        n_pred    - number of surrogate compilations for the predictor. This
125                    influences the total length of the predictor output array
126                    int_
127        predictor - string array determines the type of bootstrap predictor to
128                    be used:
129                        shuffle - shuffles inter-event intervals of the longer
130                                  input array and calculates relative
131                                  differences with the shorter input array.
132                                  n_pred determines the number of repeated
133                                  shufflings, resulting differences are pooled
134                                  from all repeated shufflings
135        display   - if True the corresponding plots will be displayed. If False,
136                    int, int_ and norm will be returned.
137                    when display = True and n_pred > 1, the averaged predictor
138                    will be plotted.
139        kwargs    - arguments to be passed to numpy.histogram.
140
141    Outputs:
142        int  - accumulated differences of events in sua1 minus the events in
143               sua2. Thus positive values of int relate to events of sua2 that
144               lead events of sua1. Units are the same as the input arrays.
145        int_ - predictor: accumulated differences based on the prediction
146               method. The length of int_ is n_pred * length(int).  Units are
147               the same as the input arrays.
148        norm - normalization factor used to scale the bin heights in int and
149               int_. int/norm and int_/norm correspond to the linear
150               correlation coefficient.
151   
152    Examples:
153        >> crosscorrelate(numpy_array1, numpy_array2)
154        >> crosscorrelate(spike_train1, spike_train2)
155        >> crosscorrelate(spike_train1, spike_train2, lag = 150.0)
156        >> crosscorrelate(spike_train1, spike_train2, display=True,
157                          kwargs={'bins':100})
158           
159    See also:
160        ccf
161    """   
162    assert predictor is 'shuffle' or predictor is None, "predictor must be \
163    either None or 'shuffle'. Other predictors are not yet implemented."
164   
165    #Check whether sua1 and sua2 are SpikeTrains or arrays
166    sua = []
167    for x in (sua1, sua2):
168        #if isinstance(x, SpikeTrain):
169        if hasattr(x, 'spike_times'):
170            sua.append(x.spike_times)
171        elif x.ndim == 1:
172            sua.append(x)
173        elif x.ndim == 2 and (x.shape[0] == 1 or x.shape[1] == 1):
174            sua.append(x.ravel())
175        else:
176            raise TypeError("sua1 and sua2 must be either instances of the" \
177                            "SpikeTrain class or column/row vectors")
178    sua1 = sua[0]
179    sua2 = sua[1]
180   
181    if sua1.size < sua2.size:
182        if lag is None:
183            lag = numpy.ceil(10*numpy.mean(numpy.diff(sua1)))
184        reverse = False
185    else:
186        if lag is None:
187            lag = numpy.ceil(20*numpy.mean(numpy.diff(sua2)))
188        sua1, sua2 = sua2, sua1
189        reverse = True
190           
191    #construct predictor
192    if predictor is 'shuffle':
193        isi = numpy.diff(sua2)
194        sua2_ = numpy.array([])
195        for ni in xrange(1,n_pred+1):
196            idx = numpy.random.permutation(isi.size-1)           
197            sua2_ = numpy.append(sua2_, numpy.add(numpy.insert(
198                (numpy.cumsum(isi[idx])), 0, 0), sua2.min() + (
199                numpy.random.exponential(isi.mean()))))
200           
201    #calculate cross differences in spike times
202    int = numpy.array([])
203    int_ = numpy.array([])
204    for k in xrange(0, sua1.size):
205        int = numpy.append(int, sua1[k] - sua2[numpy.nonzero(
206            (sua2 > sua1[k] - lag) & (sua2 < sua1[k] + lag))])
207    if predictor == 'shuffle':
208        for k in xrange(0, sua1.size):
209            int_ = numpy.append(int_, sua1[k] - sua2_[numpy.nonzero(
210                (sua2_ > sua1[k] - lag) & (sua2_ < sua1[k] + lag))])
211    if reverse is True:
212        int = -int
213        int_ = -int_
214       
215    norm = numpy.sqrt(sua1.size * sua2.size)
216   
217    # Plot the results if display=True   
218    if display:
219        subplot = get_display(display)
220        if not subplot or not HAVE_PYLAB:
221            return int, int_, norm
222        else:
223            # Plot the cross-correlation
224            try:
225                counts, bin_edges = numpy.histogram(int, **kwargs)
226                edge_distances = numpy.diff(bin_edges)
227                bin_centers = bin_edges[1:] - edge_distances/2
228                counts = counts / norm
229                xlabel = "Time"
230                ylabel = "Cross-correlation coefficient"
231                #NOTE: the x axis corresponds to the upper edge of each bin
232                subplot.plot(bin_centers, counts, label='cross-correlation', color='b')
233                if predictor is None:
234                    set_labels(subplot, xlabel, ylabel)
235                    pylab.draw()
236                elif predictor is 'shuffle':           
237                    # Plot the predictor
238                    norm_ = norm * n_pred
239                    counts_, bin_edges_ = numpy.histogram(int_, **kwargs)
240                    counts_ = counts_ / norm_
241                    subplot.plot(bin_edges_[1:], counts_, label='predictor')
242                    subplot.legend()
243                    pylab.draw()
244            except ValueError:
245                print "There are no correlated events within the selected lag"\
246                " window of %s" % lag
247    else:
248        return int, int_, norm
249   
250def _dict_max(D):
251    """
252    For a dict containing numerical values, contain the key for the
253    highest value. If there is more than one item with the same highest
254    value, return one of them (arbitrary - depends on the order produced
255    by the iterator).
256    """
257    max_val = max(D.values())
258    for k in D:
259        if D[k] == max_val:
260            return k
261       
262def make_kernel(form, sigma, time_stamp_resolution, direction=1):
263    """
264    Constructs a numeric linear convolution kernel of basic shape to be used
265    for data smoothing (linear low pass filtering) and firing rate estimation
266    from single trial or trial-averaged spike trains.
267   
268    Exponential and alpha kernels may also be used to represent postynaptic
269    currents / potentials in a linear (current-based) model.
270   
271    Adapted from original script written by Martin P. Nawrot for the FIND MATLAB
272    toolbox.
273    See FIND - a unified framework for neural data analysis,
274        Meier R, Egert U, Aertsen A, Nawrot MP; Neural Netw. 2008 Oct;
275        21(8):1085-93.
276       
277        Nawrot M, Aertsen A, Rotter S (1999)
278        Single-trial estimation of neuronal firing rates - From single neuron
279        spike trains to population activity.
280        J Neurosci Meth 94: 81-92
281       
282    Inputs:
283        form                  - kernel form (string) Currently implemented forms
284                                are BOX - boxcar, TRI - triangle, GAU - gaussian
285                                , EPA - epanechnikov, EXP - exponential, ALP -
286                                alpha function.
287                                EXP and ALP are aymmetric kernel forms and
288                                assume optional parameter 'direction'
289        sigma                 - standard deviation of the distribution
290                                associated with kernel shape. This parameter
291                                defines the time resolution of the kernel
292                                estimate and makes different kernels comparable
293                                (cf. [1] for symetric kernels). This is used
294                                here as an alternative definition to the cut-off
295                                frequency of the associated linear filter.
296        time_stamp_resolution - temporal resolution of input and output in ms
297        direction             - Asymmetric kernels have two possible directions.
298                                The values are -1 or 1, default is 1. The
299                                definition here is that for direction = 1 the
300                                kernel represents the impulse response function
301                                of the linear filter. Default value is 1.
302   
303    Outputs:
304        kernel  - array of kernel. The length of this array is always an odd
305                  number to represent symmetric kernels such that the center bin
306                  coincides with the median of the numeric array, i.e for a
307                  triangle, the maximum will be at the center bin with equal
308                  number of bins to the right and to the left.
309        norm    - for rate estimates. The kernel vector is normalized such that
310                  the sum of all entries equals unity sum(kernel)=1. When
311                  estimating rate functions from discrete spike data (0/1) the
312                  additional parameter 'norm' allows for the normalization to
313                  rate in spikes per second.
314                  Use: rate = norm * scipy.signal.lfilter(kernel, 1, spike_data)
315        m_idx   - index of the numerically determined median (center of gravity)
316                  of the kernel function
317                 
318    Further comments:
319   
320    Assume matrix X of n spike trains represented as binary vector (0/1).
321   
322    To obtain single trial rate function of trial one should use
323        r = norm * scipy.signal.fftconvolve(sua, kernel)
324    To obtain trial-averaged spike train one should use
325        r_avg = norm * scipy.signal.fftconvolve(sua, numpy.mean(X,1))
326       
327        where X is an array of shape (l,n), where n is the number of trials and
328        l the length of each trial
329   
330    Note that the output of scipy.signal.fftconvolve needs to trimmed acordingly
331    before being displayed. For more information see the source of the method
332    SpikeTrain.instantaneous_rate()
333   
334    See also:
335        SpikeTrain.instantaneous_rate, SpikeList.averaged_instantaneous_rate
336    """
337    assert form.upper() in ('BOX','TRI','GAU','EPA','EXP','ALP'), "form must \
338    be one of either 'BOX','TRI','GAU','EPA','EXP' or 'ALP'!"
339   
340    assert direction in (1,-1), "direction must be either 1 or -1"
341   
342    sigma = sigma / 1000. #convert to SI units
343   
344    time_stamp_resolution = time_stamp_resolution / 1000. #convert to SI units
345   
346    norm = 1./time_stamp_resolution
347
348    if form.upper() == 'BOX':
349        w = 2.0 * sigma * numpy.sqrt(3)
350        width = 2 * numpy.floor(w / 2.0 / time_stamp_resolution) + 1  # always odd number of bins
351        height = 1. / width
352        kernel = numpy.ones((1, width)) * height  # area = 1
353       
354    elif form.upper() == 'TRI':
355        w = 2 * sigma * numpy.sqrt(6)
356        halfwidth = numpy.floor(w / 2.0 / time_stamp_resolution)
357        trileft = numpy.arange(1, halfwidth + 2)
358        triright = numpy.arange(halfwidth, 0, -1)  # odd number of bins
359        triangle = numpy.append(trileft, triright)
360        kernel = triangle / triangle.sum()  # area = 1
361       
362    elif form.upper() == 'EPA':
363        w = 2.0 * sigma * numpy.sqrt(5)
364        halfwidth = numpy.floor(w / 2.0 / time_stamp_resolution)
365        base = numpy.arange(-halfwidth, halfwidth + 1)
366        parabula = base**2
367        epanech = parabula.max() - parabula  # inverse parabula
368        kernel = epanech / epanech.sum()  # area = 1
369       
370    elif form.upper() == 'GAU':
371        SI_sigma = sigma / 1000.0
372        w = 2.0 * sigma * 2.7  # > 99% of distribution weight
373        halfwidth = numpy.floor(w / 2.0 / time_stamp_resolution)  # always odd
374        base = numpy.arange(-halfwidth, halfwidth + 1) / 1000.0 * (
375            time_stamp_resolution)
376        g = numpy.exp(-(base**2) / 2.0 / SI_sigma**2) / SI_sigma / numpy.sqrt(
377            2.0 * numpy.pi)
378        kernel = g / g.sum()
379       
380    elif form.upper() == 'ALP':
381        SI_sigma = sigma / 1000.0
382        w = 5.0 * sigma
383        alpha = numpy.arange(1, (2.0 * numpy.floor(
384            (w / time_stamp_resolution / 2.0)) + 1) + 1) / 1000.0 * \
385        time_stamp_resolution
386        alpha = (2.0 / SI_sigma**2) * alpha * numpy.exp(-alpha * numpy.sqrt(2) \
387                                                        / SI_sigma)
388        kernel = alpha / alpha.sum()  # normalization
389        if direction == -1:
390            kernel = numpy.flipud(kernel)
391           
392    elif form.upper() == 'EXP':
393        SI_sigma = sigma / 1000.0
394        w = 5.0 * sigma
395        expo = numpy.arange(1, (2.0 * numpy.floor(w / time_stamp_resolution / (
396            2.0)) + 1) + 1) / 1000.0 * time_stamp_resolution
397        expo = numpy.exp(-expo / SI_sigma)
398        kernel = expo / expo.sum()
399        if direction == -1:
400            kernel = numpy.flipud(kernel)
401   
402    kernel = kernel.ravel()
403    m_idx = numpy.nonzero(kernel.cumsum() >= 0.5)[0].min()
404   
405    return kernel, norm, m_idx
406
407def simple_frequency_spectrum(x):
408    """
409    Very simple calculation of frequency spectrum with no detrending,
410    windowing, etc. Just the first half (positive frequency components) of
411    abs(fft(x))
412    """
413    spec = numpy.absolute(numpy.fft.fft(x))
414    spec = spec[:len(x)/2] # take positive frequency components
415    spec /= len(x)         # normalize
416    spec *= 2.0            # to get amplitudes of sine components, need to multiply by 2
417    spec[0] /= 2.0         # except for the dc component
418    return spec
419
420class TuningCurve(object):
421    """Class to facilitate working with tuning curves."""
422
423    def __init__(self, D=None):
424        """
425        If `D` is a dict, it is used to give initial values to the tuning curve.
426        """
427        self._tuning_curves = {}
428        self._counts = {}
429        if D is not None:
430            for k,v in D.items():
431                self._tuning_curves[k] = [v]
432                self._counts[k] = 1
433                self.n = 1
434        else:
435            self.n = 0
436
437    def add(self, D):
438        for k,in D.items():
439            self._tuning_curves[k].append(v)
440            self._counts[k] += 1
441        self.n += 1
442
443    def __getitem__(self, i):
444        D = {}
445        for k,v in self._tuning_curves[k].items():
446            D[k] = v[i]
447        return D
448   
449    def __repr__(self):
450        return "TuningCurve: %s" % self._tuning_curves
451
452    def stats(self):
453        """Return the mean tuning curve with stderrs."""
454        mean = {}
455        stderr = {}
456        n = self.n
457        for k in self._tuning_curves.keys():
458            arr = numpy.array(self._tuning_curves[k])
459            mean[k] = arr.mean()
460            stderr[k] = arr.std()*n/(n-1)/numpy.sqrt(n)
461        return mean, stderr
462
463    def max(self):
464        """Return the key of the max value and the max value."""
465        k = _dict_max(self._tuning_curves)
466        return k, self._tuning_curves[k]
Note: See TracBrowser for help on using the browser.