libftsh
A Fast Transform for Spherical Harmonics
 All Data Structures Files Functions Variables Defines
Data Structures | Defines | Functions
libftsh.h File Reference

Header for a library for doing the Fast Transform for Spherical Harmonics. More...

#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <time.h>
#include "fftw.h"
#include "rfftw.h"

Go to the source code of this file.

Data Structures

struct  Ftrigtw_Plan
 Holder for precomputed FFTW objects. More...
struct  Dense_Matrix
 An ordinary (one dimensional) matrix. More...
struct  Dense_Vector
 An ordinary (one dimensional) vector. More...
struct  Banded_Matrix
 A one dimensional sparse matrix, with banded structure. More...
struct  Bdmat_Op_Workspace
 Workspace for Banded_Matrix routines. More...
struct  Dyadic_Gsearch_Save
 Workspace for "best non-decreasing dyadic partition" search. More...
struct  Fast1_Save
 Extra things to save for fast1 algorithm. More...
struct  Local_Trig_Interval
 Information to do a local trigonometric transform on an interval. More...
struct  Pm_1D
 Hold a compressed matrix of Associate Legendre functions of a fixed order. More...
struct  Pm_Direct_bd
 Hold a banded matrix of Associate Legendre functions of a fixed order. More...
struct  Pm_Direct_d
 Hold a dense matrix of Associate Legendre functions of a fixed order. More...
struct  Pm_1D_Workspace
 Hold workspace and a few precomputed values for the fast synthesis/analysis. More...
struct  Pmncos_Recurrence_Save
 Hold data and workspace to do the three-term recurrence for a vector of sample points. More...

Defines

#define PI   3.14159265358979323846264338328
#define USE_double   1
#define REAL   double

Functions

Dense_Vector init_dvect_memory (unsigned int n)
void give_dvect_memory (Dense_Vector *current, Dense_Vector *previous)
Dense_Matrix init_dmat_memory (unsigned int n)
void give_dmat_memory (Dense_Matrix *current, Dense_Matrix *previous)
void free_dmat_memory (Dense_Matrix *in)
void dump_bin_dmat (Dense_Matrix *x, FILE *out)
void load_bin_dmat (Dense_Matrix *x, FILE *in)
void dmat_dvect_multiply (Dense_Vector *out, Dense_Matrix *inmat, Dense_Vector *invect)
void dmat_t_dvect_multiply (Dense_Vector *out, Dense_Matrix *inmat, Dense_Vector *invect)
Banded_Matrix init_bdmat_memory (int max_size, int max_rows, int max_bands)
void give_bdmat_memory (Banded_Matrix *current, Banded_Matrix *previous)
void free_bdmat_memory (Banded_Matrix *in)
void bdmat_copy (Banded_Matrix *out, Banded_Matrix *in)
void dump_bin_bdmat (Banded_Matrix *x, FILE *out)
void load_bin_bdmat (Banded_Matrix *x, FILE *in)
void bdmat_dvect_multiply (Dense_Vector *out, Banded_Matrix *inmat, Dense_Vector *invect)
void bdmat_t_dvect_multiply (Dense_Vector *out, Banded_Matrix *inmat, Dense_Vector *invect)
void dmat_to_bdmat (Banded_Matrix *out, Dense_Matrix *in, REAL cutoff, int min_gap)
int dyadic_gsearch (Dyadic_Gsearch_Save *dy_gs_save)
void dyadic_gsearch_init (Dyadic_Gsearch_Save *dy_gs_save, int num_levels)
void dyadic_gsearch_free (Dyadic_Gsearch_Save *dy_gs_save)
void fast1_fromfile (int *numpts_ptr, int *band_limit_ptr, REAL **node_ptr, REAL **weight_ptr, Pm_1D **pmn_matrix_ptr, Pm_1D_Workspace *pmnwork, FILE *fp)
void load_fast1 (int *numpts_ptr, int *band_limit_ptr, int *num_levels_ptr, REAL **node_ptr, REAL **weight_ptr, Fast1_Save *saves_fordump, Pm_1D **pmn_matrix_ptr, FILE *fp)
void direct1_fromfile (int *numpts_ptr, int *band_limit_ptr, REAL **node_ptr, REAL **weight_ptr, Pm_Direct_d **pmn_matrix_ptr, Pm_1D_Workspace *pmnwork, FILE *fp)
void load_direct1 (int *numpts_ptr, int *band_limit_ptr, REAL **node_ptr, REAL **weight_ptr, Pm_Direct_d **pmn_matrix_ptr, FILE *fp)
void dctw (REAL *fcn, Ftrigtw_Plan *main_plan, int dir)
void dctw_init (Ftrigtw_Plan *main_plan, int flags, int numpts)
void rfftw_to_NR_format (REAL *data_nr, REAL *data_rfftw, int numpts, int dir)
void lctw (REAL *fcn, Ftrigtw_Plan *main_plan)
void lctw_init (Ftrigtw_Plan *main_plan, int flags, int numpts)
REAL dualabell (REAL(*origbell)(int, int), int laplength, int t)
void bell_1size_init (REAL *bellout, REAL(*whichbell)(int, int), int lapleft, int center, int lapright, int dual_flag)
void fold_e_e_1s (REAL *coef, REAL *fcn, int numpts, REAL *left, int n_left, REAL *right, int n_right, REAL *bell, int dir)
void fold_e_o_1s (REAL *coef, REAL *fcn, int numpts, REAL *left, int n_left, REAL *right, int n_right, REAL *bell, int dir)
REAL Pmncos (int order, int index, REAL phi)
void Pmncos_resave_init (Pmncos_Recurrence_Save *pmn_save, REAL *node, int numpts)
void next_rtsinPmncos (REAL *outfcn, Pmncos_Recurrence_Save *pmn_save, int halfpts, int order, int index)
void Pmncos_resave_destroy (Pmncos_Recurrence_Save *pmn_save)
void direct1_tofile (int numpts, int band_limit, REAL *node, REAL *weight, FILE *fp)
void init_direct1 (Pm_Direct_d **out, int halfpts, int band_limit, REAL *node)
void dump_direct1 (int numpts, int band_limit, REAL *node, REAL *weight, Pm_Direct_d *pmn_matrix, FILE *fp)
REAL fast1_tofile (int numpts, int band_limit, REAL *node, REAL *weight, REAL epsilon, FILE *fp)
void init_fast1 (Pm_1D **out, Fast1_Save *saves_fordump, int halfpts, int band_limit, REAL *node, int *numlevels, REAL epsilon, REAL(*bell_choice)(int, int))
void dump_fast1 (int numpts, int band_limit, int num_levels, REAL *node, REAL *weight, Fast1_Save *saves_fordump, Pm_1D *pmn_matrix, FILE *fp)
void Pm_direct1_analyse (REAL *outcoef, Pm_Direct_d *inmat, REAL *infcn, Pm_1D_Workspace *pmnwork)
void Pm_direct1_synthesize (REAL *outfcn, Pm_Direct_d *inmat, REAL *incoef, Pm_1D_Workspace *pmnwork)
REAL bellmat1 (int laplength, int t)
REAL bellmat2 (int laplength, int t)
REAL bellmat3 (int laplength, int t)
REAL bellmat4 (int laplength, int t)
REAL bellmat5 (int laplength, int t)
REAL bellmat6 (int laplength, int t)
REAL bellmat7 (int laplength, int t)
REAL bellmat8 (int laplength, int t)
REAL bellmat9 (int laplength, int t)
REAL bellmat10 (int laplength, int t)
REAL bellmat11 (int laplength, int t)
REAL bellmat12 (int laplength, int t)
REAL bellmat13 (int laplength, int t)
REAL bellmat14 (int laplength, int t)
REAL bellmat15 (int laplength, int t)
REAL bellmat16 (int laplength, int t)
REAL bellmat17 (int laplength, int t)
REAL bellmat18 (int laplength, int t)
REAL bellmat19 (int laplength, int t)
REAL bellmat20 (int laplength, int t)
void Pm_1d_analyse (REAL *outcoef, Pm_1D *inmat, REAL *infcn, Pm_1D_Workspace *pmnwork)
void Pm_1d_synthesize (REAL *outfcn, Pm_1D *inmat, REAL *incoef, Pm_1D_Workspace *pmnwork)
void mk_gnode_gweight (REAL *gnode, REAL *gweight, REAL epsilon, int numpts)
void mk_cosi_gnode (REAL *gnode, REAL epsilon, int numpts)
void cosi_gnode_to_gweight (REAL *gweight, REAL *gnode, int numpts)
double second ()
char * getoption (const char *flag, int argc, char **argv)
void pointwise_multiply (REAL *out, REAL *in_1, REAL *in_2, int numpts)
void reflect_evenodd (REAL *even, REAL *odd, REAL *whole, int numpts, int dir)
void remove_rootsin (REAL *out, REAL *in, REAL *node, int numpts)
void dyadic_g_ilist_init (Local_Trig_Interval *ilist[2], int *ints_used, int *coefs_used, REAL **bell_handle, int *bell_length, int **bell_offsets, int halfpts, int num_levels, REAL(*bell_choice)(int, int), int flags)
void ltrigint_init (Local_Trig_Interval *out, int numpts, int left_extra, int right_extra, int fcn_offset, int coef_offset, REAL(*bell_choice)(int, int), int which_transform, int fftw_flags)
void gsearch_to_ilist (Local_Trig_Interval *ilist_out, int *intervals_kept, int max_intervals, Local_Trig_Interval *ilist_in, Dyadic_Gsearch_Save *dy_gs_save, int halfpts)
void evaluate_intlist (REAL *outfcn, REAL *incoef, Local_Trig_Interval *interval_list, int num_intervals)
void expand_d_intlist (REAL *outcoef, REAL *infcn, Local_Trig_Interval *interval_list, int num_intervals)
void expand_intlist (REAL *outcoef, REAL *infcn, Local_Trig_Interval *interval_list, int num_intervals)
int count_thresh (REAL *lambda, int howmany, REAL thresh)
void init_Pm_1d_workspace (Pm_1D_Workspace *out, int numpts, int band_limit, REAL *node, REAL *weight)
void ftgaqd (int *nlat, REAL *theta, REAL *wts, REAL *work, int *lwork, int *ierror)
void gnode_to_Pmninit (REAL ***sofar, REAL **ex, REAL **why, int **size, REAL *gnode, int hemipts)

Detailed Description

Header for a library for doing the Fast Transform for Spherical Harmonics.


Define Documentation

#define REAL   double

REAL is the chosen floating-point data type (float or double)

#define USE_double   1

USE_double sould be defined by a compiler flag

  • 0 for float
  • 1 for double

Function Documentation

void bdmat_copy ( Banded_Matrix out,
Banded_Matrix in 
)

This routine copies a Banded_Matrix.

INPUTS: in -- the Banded_Matrix to copy

OUTPUT: out -- the values of in are copied to out Should be initialized as follows

  • .max_rows is the amount of row memory available
  • .band_p points to .max_rows ints
  • .max_bands is the amount of band memory available
  • .band_list points to 3*.max_bands ints
  • .max_size is the amount of main memory available
  • .matrix points to .max_size REAL

Here is the caller graph for this function:

void bdmat_dvect_multiply ( Dense_Vector out,
Banded_Matrix inmat,
Dense_Vector invect 
)

This routine applies a Banded_Matrix to a Dense_Vector to produce a Dense_Vector. The matrix may be square or rectangular.

INPUTS:

  • out -- the Dense_Vector to put the result in. Should be initialized as follows:
    • .max_size is the amount of memory available.
    • .vector points to .max_size REAL
  • inmat -- the Banded_Matrix.
  • invect -- the Dense_Vector

OUTPUT: out is modified so out = inmat * invect

NOTES:

  • might want to hold some things as local registers, rather than repeatedly accessing arrays.
  • looping structure may be inefficient

Here is the caller graph for this function:

void bdmat_t_dvect_multiply ( Dense_Vector out,
Banded_Matrix inmat,
Dense_Vector invect 
)

This routine applies the TRANSPOSE of a Banded_Matrix to a Dense_Vector to produce a Dense_Vector. The matrix may be square or rectangular.

INPUTS:

  • out -- the Dense_Vector to put the result in. Should be initialized as follows
    • .max_size is the amount of memory available
    • .vector points to .max_size REAL
  • inmat -- the Banded_Matrix.
  • invect -- the Dense_Vector

OUTPUT: out is modified so out = TRANSPOSE(inmat) * invect

NOTES:

  • might want to hold some things as local registers, rather than repeatedly accessing arrays
  • looping structure may be inefficient

Here is the caller graph for this function:

void bell_1size_init ( REAL bellout,
REAL(*)(int, int)  whichbell,
int  lapleft,
int  center,
int  lapright,
int  dual_flag 
)

Computes bell values for a given size and overlap configuration.

INPUTS:

  • whichbell -- a pointer to a bell function. The bells have the form bell(laplength,t) with
    • laplength -- the number of POINTS the bell spills out of the interval. It also spills in an equal number of points. Thus the bell takes 2*laplength points to rise from 0 to 1.
    • t -- Which point you want to evaluate the bell at. This should range from 0 to 2*laplength-1. bell(laplength,0)=~0 then it rises until bell(laplength,2*laplength-1)=~1 The edge of the interval is between points, at t=(laplength-1) +0.5 The bell function returns a REAL which is the bell's value at that point.
  • lapleft -- the laplength to use at the left edge
  • center -- the main interval length. must be at least lapleft+lapright. If it is longer than this, the bell is given value 1 on the extra portion
  • lapright-- the laplength to use at the right edge
  • dual_flag -- a flag for if we should make the dual of the input bell. Use for inverting bi-orthogonal bells
    • 0 (or !=1) -- normal, don't dual
    • 1 -- make the dual

OUTPUT: bellout --is written onto. The first lapleft+center+lapright entries are used.

Generally, the bell will look like

                               ---------------------\               
                         _____/                      -\______       
           _________/----                                    - ___  
    ___/---                                                        \-
/--                                                                 \
|---------------|--------------|--------------------|--------|--------|
   lapleft      | lapleft                            lapright|lapright
                |- - - - - - - - - center - - - - - - - - - -|      

NOTES:

  • It is legal to have lapleft and or lapright equal to zero
  • We could call memory internally, but we allow the user to try to localize memory.

Here is the call graph for this function:

Here is the caller graph for this function:

void cosi_gnode_to_gweight ( REAL gweight,
REAL gnode,
int  numpts 
)

This routine computes the gaussian weights that go with a set of gaussian nodes.

The nodes input here are \(\cos^{-1}(\text{the usual nodes} )\).

INPUTS:

  • gweight -- the array in which to put the gaussian weights. Memory must already be allocated.
  • gnode -- the gausian nodes, theta values between 0 and PI This is \(\cos^{-1}(\text{the usual nodes} )\).
  • numpts -- the order of the weights we compute this is the length of gnode and gweight.

OUTPUTS: numpts values of gweight are filled in

NOTES:

  • We use the Christoffel function method, mainly because we can avoid derivatives.
  • Do I use normalized Pmn?

Here is the caller graph for this function:

int count_thresh ( REAL lambda,
int  howmany,
REAL  thresh 
)

This function counts how many members of lambda are above thresh in absolute value.

INPUTS:

  • lambda-- an array of REALs of length howmany
  • howmany-- the length of lambda
  • thresh -- the threshhold used in the cutoff

OUTPUT: -- returns the count

void dctw ( REAL fcn,
Ftrigtw_Plan main_plan,
int  dir 
)

Computes the disrete cosine transform of fcn and puts the coefficients back into fcn

This is the DCT at half integer points.

INPUTS:

  • fcn -- array containing the data
  • main_plan -- a structure holding precomputed trig functions, the plan used by fftw,and the number of points initialized by dctw_init
  • dir -- the direction to transform. 1 is expansion and -1 evaluation.

OUTPUT: fcn.

  • In forward mode (dir==1), puts the coefficients in fcn. \[f(k) = norm \sum_{j=0}^{numpts-1} f(j) \cos\left(\pi\frac{k(j+1/2)}{numpts}\right) \] set for $L^2$ normalization.
         numpts-1              k  (j+0.5)
    fcn[k]= SUM   fcn[j] cos( PI ----------------  ) *norm
           j=0                         numpts   
           set for L^2 normalization.
    
  • In reverse mode inverts this process.

NOTES:

  • comment better
  • requires even numpts. checked by dctw_init
  • precompute sqrt(2.0)?
  • slow and awkward

Here is the call graph for this function:

Here is the caller graph for this function:

void dctw_init ( Ftrigtw_Plan main_plan,
int  flags,
int  numpts 
)

This routine initializes for dctw

We initialize trigonometic things used by the lct and dct portions,
and call the initailizer for the fftw real transform (forward).

INPUTS: 
- main_plan -- a structure that will hold the things we generate
  Memory for its fields is allocated in this routine.
- flags -- flags passed directly to the rfftw initializer
- numpts -- the number of points on which we will transform
  MUST be even

OUTPUTS: main_plan is filled in:

NOTES:
- comment better

keep these always as double so we don't lose precision

Here is the caller graph for this function:

void direct1_fromfile ( int *  numpts_ptr,
int *  band_limit_ptr,
REAL **  node_ptr,
REAL **  weight_ptr,
Pm_Direct_d **  pmn_matrix_ptr,
Pm_1D_Workspace pmnwork,
FILE *  fp 
)

This routine loads the initialization dumped by dump_direct1, interprets it and does all the set-up necessary to do the synthesis or analysis transform.

INPUTS: fp -- the objects are read from the file pointed to by fp, in binary format. The file must be opened before calling this routine.

OUTPUTS:

  • numpts -- the number of points on the full interval.
  • band_limit -- the bound on the degree of the expansion. In \(P^m_n\) notation this bounds n.
  • node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
  • weight -- an array of length numpts with the quadrature weights
  • pmn_matrix -- an array of band_limit (==order_limit) Pm_Direct_d structures, each representing the matrix for one m.
  • pmn_work -- a structure holding workspace and other precomputed values used by the transform, independent of m

NOTES:

  • node and weight are not needed for the synthesis/analysis routines but are provided here for other uses.

Here is the call graph for this function:

void direct1_tofile ( int  numpts,
int  band_limit,
REAL node,
REAL weight,
FILE *  fp 
)

This routine initializes the direct transform for spherical harmonics (version 1) and dumps it to a file.

INPUTS:

  • numpts -- the number of points on the full interval. Must be divisible by 2.
  • band_limit -- the bound on the degree of the expansion. In \(P^m_n\) notation this bounds n.
  • node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
  • weight -- an array of length numpts with the quadrature weights

OUTPUTS: fp - the objects created are written onto the file pointed to by fp, in binary format. The file must be opened before calling this routine.

NOTES:

  • node and weight are input so the user can select.

Here is the call graph for this function:

void dmat_dvect_multiply ( Dense_Vector out,
Dense_Matrix inmat,
Dense_Vector invect 
)

Apply a matrix to a vector to get a new vector. The matrix is in Dense_Matrix format, and the vectors in Dense_Vector format.

INPUTS:

  • out -- the Dense_Vector to put the result in. Should be initialized as follows:
    • .max_size is the amount of memory available.
    • .vector points to .max_size REAL
  • invect -- an input vector in Dense_Vector format
  • inmat -- the input matrix in Dense_Matrix format

OUTPUT: out -- inmat * invect = out

NOTES:

  • ordering of loops not optimized
  • ? use registers for n_row n_col?
  • blocking could help speed

Here is the caller graph for this function:

void dmat_t_dvect_multiply ( Dense_Vector out,
Dense_Matrix inmat,
Dense_Vector invect 
)

Apply the TRANSPOSE of a matrix to a vector to get a new vector. The matrix is in Dense_Matrix format, and the vectors in Dense_Vector format.

INPUTS:

  • out -- the Dense_Vector to put the result in. Should be initialized as follows:
    • .max_size is the amount of memory available.
    • .vector points to .max_size REAL
  • invect -- an input vector in Dense_Vector format
  • inmat -- the input matrix in Dense_Matrix format

OUTPUT: out -- TRANSPOSE(inmat) * invect = out

NOTES:

  • ordering of loops not optimized
  • blocking may improve speed

Here is the caller graph for this function:

void dmat_to_bdmat ( Banded_Matrix out,
Dense_Matrix in,
REAL  cutoff,
int  min_gap 
)

Convert a matrix in Dense_Matrix form to Banded_Matrix form.

INPUT:

  • out -- Where to put the result in Banded_Matrix format. Should be initialized as follows
    • .max_rows is the amount of row memory available
    • .band_p points to .max_rows ints +.max_bands is the amount of band memory available
    • .band_list points to 3*.max_bands ints
    • .max_size is the amount of main memory available
    • .matrix points to .max_size REAL
  • in -- the Dense_Matrix
  • cutoff -- the truncation threshold to use in the conversion. Entries less than cutoff are discarded. To disable truncation, set cutoff <0 .
  • min_gap -- the minimal allowed gap between bands. Coefficients less than cutoff are retained if removing them makes a gap of less than min_gap. To disable, set to 0.

OUTPUT: out

NOTES:

  • not checking for sufficient memory

Here is the caller graph for this function:

REAL dualabell ( REAL(*)(int, int)  origbell,
int  laplength,
int  t 
)

Return the value of the dual to the given bell.

INPUTS:

  • origbell - pointer to the original bell, e.g. bellmat5.
  • laplength-- the number of POINTS the bell spills out of the interval. It also spills in an equal number of points. Thus the bell takes 2*laplength points to rise from 0 to 1.
  • t -- Which point you want to evaluate the bell at. This should range from 0 to 2*laplength-1. bell(laplength,0)=~0 then it rises until bell(laplength,2*laplength-1)=~1. The edge of the interval is between points, at t=(laplength-1) +0.5

OUTPUT: return the REAL value at that point.

Here is the caller graph for this function:

void dump_bin_bdmat ( Banded_Matrix x,
FILE *  out 
)

This routine dumps a Banded_Matrix to a file in binary format.

INPUTS:

  • x -- the Banded_Matrix
  • out -- A pointer to the file to write into. Setting this to stdout makes it write to the screen. If out==NULL we use stdout.

OUTPUT: writes into the file given by out. The writing mode ("wb" or "ab") is determined outside of this routine.

Here is the caller graph for this function:

void dump_bin_dmat ( Dense_Matrix x,
FILE *  out 
)

This routine dumps a Dense_Matrix to a file in binary format.

This is efficient, but not human readable.

INPUTS:

  • x -- the Dense_Matrix
  • out -- A pointer to the file to write into. Setting this to stdout makes it write to the screen. If out==NULL we use stdout.

OUTPUT: writes into the file given by out. The writing mode ("wb" or "ab") is determined outside of this routine.

Here is the caller graph for this function:

void dump_direct1 ( int  numpts,
int  band_limit,
REAL node,
REAL weight,
Pm_Direct_d pmn_matrix,
FILE *  fp 
)

This routine dumps the initialization constructed by init_direct1 and general parameters for the direct transform for spherical harmonics, dense version.

INPUTS:

  • numpts -- the number of points on the full interval.
  • band_limit -- the bound on the degree of the expansion. In \(P^m_n\) notation this bounds n.
  • node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
  • weights -- an array of length numpts with the quadrature weights
  • pmn_matrix -- an array of band_limit (==order_limit) Pm_Direct_d structures, each representing the matrix for one m.

OUTPUTS: fp -- the objects above are written into the file pointed to by fp, in binary format. The file must be opened before calling this routine.

Here is the call graph for this function:

Here is the caller graph for this function:

void dump_fast1 ( int  numpts,
int  band_limit,
int  num_levels,
REAL node,
REAL weight,
Fast1_Save saves_fordump,
Pm_1D pmn_matrix,
FILE *  fp 
)

This routine dumps the initialization constructed by init_fast1 and general parameters for the fast transform for spherical harmonics, version 1.

INPUTS:

  • numpts -- the number of points on the full interval.
  • band_limit -- the bound on the degree of the expansion. In \(P^m_n\) notation this bounds n.
  • num_levels -- the number of dyadic levels used in the adaption
  • node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
  • weights -- an array of length numpts with the quadrature weights
  • saves_fordump -- a structure containing the information needed for the bells. Constructed by init_fast1
  • pmn_matrix -- an array of band_limit (==order_limit) Pm_1D structures, each representing the compressed matrix for one m.

OUTPUTS: fp -- the objects above are written into the file pointed to by fp, in binary format. The file must be opened before calling this routine.

Here is the call graph for this function:

Here is the caller graph for this function:

void dyadic_g_ilist_init ( Local_Trig_Interval ilist[2],
int *  ints_used,
int *  coefs_used,
REAL **  bell_handle,
int *  bell_length,
int **  bell_offsets,
int  halfpts,
int  num_levels,
REAL(*)(int, int)  bell_choice,
int  flags 
)

This routine constructs the interval list for a dyadic partition suitable for the growing interval size search (gsearch).

INPUTS:

  • ilist -- an array of 2 pointers to Local_Trig_Interval, to be filled in.
  • halfpts -- the number of points on which to construct these intervals. For the growing search, this usually represents half of the number of points in the full interval.
  • num_levels -- the number of dyadic levels to use. We need halfpts >= (1<<num_levels) and halfpts % (1<<num_levels) == 0
  • bell_choice-- the bell function to use
  • flags -- integer flags passed to the fftw initializer

OUTPUTs:

  • ilist[0/1] -- has memory allocated and is filled in
    • ilist[0] -- gets intervals with trig functions for even parity at the right edge
    • ilist[1] -- gets intervals with trig functions for odd parity at the right edge
  • ints_used -- is filled in with the length of the ilist's
  • coefs_used -- is filled in with the total number of possible coefficients this ilist can produced. This is used to allocate memory and set offsets within whatever array will eventually hold these coefficients
  • bell_handle -- is pointed to an array which contains all the bell values. This is used when we want to save the bells to disc efficiently.
  • bell_length -- is set to the length of bell_handle.
  • bell_offsets -- is pointed to an array containing integer offsets withing bell_handle, indicating the location of particular bells.

NOTES:

  • coefs_used includes offsets for the left edge asymmetric bells, which are not used.
  • We now use the bell memory efficiently, with no repeated computations. For the trig transforms, we do not repeat the trig value or fftw computations. There is an inefficiency in that we have multiple versions of the workspace. This adds trivial memory, but may have cache implications
  • There was a mysterious segmentation fault possibly related to free(plan_holder) so that is now disabled. This may cause a memory leak.

set which_transform

Here is the call graph for this function:

Here is the caller graph for this function:

int dyadic_gsearch ( Dyadic_Gsearch_Save dy_gs_save)

This routine implements a `best non-decreasing dyadic partition' search. It finds the partition with lowest cost under the restriction that an interval's right neighbor is either the same size or one size (2x) larger. Generally this is to be used with local cosine. When an interval's left neighbor is the same size, we use a regular full sized bell and put the cost of this expansion in .cost_full[][]. When the left neighbor is smaller we shrink the left edge of the bell, making it asymmetric and yielding .cost_asym[][].

The search tree is grown from the left, with an interval having children its allowed right neighbors. The cost of a partition is the cost of a path from the left edge to the right edge. Since each interval has two costs, the path into an interval is also important.

   | _--------------O                   |        
   |/                                   |                     
  /|  _-----O-------|--------->O        |                       
 O-|-/           _--|--------/          |
  \|       |    /   |         |         |                     
   |~--O---|-->O----|--->O----|---->O   |                        

The generic decision step is: assume the current node's children contain the minimal cost of a path from them to the right. Compare these costs and ADD the smaller to the full and asym costs of the current node. We process intervals in order of the x-coordinate of their center, starting at the right.

INPUTS: dy_gs_save -- a structure holding memory for the costs, branching and ordering of interval. See the declaration of the structure Dyadic_Gsearch_Save for the full description of its fields.

OUTPUTS: returns the minimal cost, and also stores it in dy_gs_save.costfull[dy_gs_save.best_level][0]. To determine the partition chosen, start at [best_level][0] and follow .branching[][], which has values one of {RIGHTEDGE=0, SAMESIZE=1, UPSIZE=2}

Here is the caller graph for this function:

void dyadic_gsearch_free ( Dyadic_Gsearch_Save dy_gs_save)

This function frees the structure Dyadic_Gsearch_Save used by dyadic_gsearch and initialized by dyadic_gsearch_init.

INPUTS: dy_gs_save -- a pointer to the structure to free

OUTPUT: none

Here is the caller graph for this function:

void dyadic_gsearch_init ( Dyadic_Gsearch_Save dy_gs_save,
int  num_levels 
)

This function initializes the structure Dyadic_Gsearch_Save for use by dyadic_gsearch. Memory can be freed by dyadic_gsearch_free.

INPUTS:

  • dy_gs_save -- a pointer to the structure to fill in
  • num_levels -- the number of dyadic levels to use

OUTPUT: dy_gs_save -- memory is allocated and some things filled in.

Here is the caller graph for this function:

void evaluate_intlist ( REAL outfcn,
REAL incoef,
Local_Trig_Interval interval_list,
int  num_intervals 
)

This routine does a LCT/DCT evaluation based on a list of intervals.

INPUTS:

  • outfcn -- where to put the function constructed. We ADD onto outfcn
  • incoef - the coefficients to evaluate. destroyed
  • interval_list-- An array of Local_Trig_Interval's, encoding what intervals to evaluate, how to evaluate, and where to put the result
  • num_intervals - the number of intervals

OUTPUTS: outfcn is filled in. incoef is destroyed

NOTES:

  • evaluates using the dual bell

Here is the call graph for this function:

Here is the caller graph for this function:

void expand_d_intlist ( REAL outcoef,
REAL infcn,
Local_Trig_Interval interval_list,
int  num_intervals 
)

This routine does a LCT/DCT expansion on a list of intervals.

This version expands using the DUAL bell. Otherwise it is just like expand_intlist.

See expand_intlist for a full description.

Here is the call graph for this function:

Here is the caller graph for this function:

void expand_intlist ( REAL outcoef,
REAL infcn,
Local_Trig_Interval interval_list,
int  num_intervals 
)

This routine does a LCT/DCT expansion on a list of intervals.

INPUTS:

  • outcoef -- where to put the coefficients constructed.
  • infcn - the fcn to expand
  • interval_list-- An array of Local_Trig_Interval's, encoding what intervals to expand, how to expand, and where to put the result -num_intervals - the number of intervals

OUTPUTS: outcoef is filled in. infcn is untouched

Here is the call graph for this function:

Here is the caller graph for this function:

void fast1_fromfile ( int *  numpts_ptr,
int *  band_limit_ptr,
REAL **  node_ptr,
REAL **  weight_ptr,
Pm_1D **  pmn_matrix_ptr,
Pm_1D_Workspace pmnwork,
FILE *  fp 
)

This routine loads the initialization dumped by dump_fast1, interprets it and does all the set-up necessary to do the synthesis or analysis transform

INPUTS: fp -- the objects are read from the file pointed to by fp, in binary format. The file must be opened before calling this routine.

OUTPUTS:

  • numpts -- the number of points on the full interval.
  • band_limit -- the bound on the degree of the expansion. In \(P^m_n\) notation this bounds n.
  • node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
  • weight -- an array of length numpts with the quadrature weights
  • pmn_matrix -- an array of band_limit (==order_limit) Pm_1D structures, each representing the compressed matrix for one m.
  • pmn_work -- a structure holding workspace and other precomputed values used by the transform, independent of m

NOTES:

  • node and weight are not needed for the synthesis/analysis routines but are provided here for other uses.

Here is the call graph for this function:

REAL fast1_tofile ( int  numpts,
int  band_limit,
REAL node,
REAL weight,
REAL  epsilon,
FILE *  fp 
)

This routine initializes the fast transform for spherical harmonics (version 1) and dumps it to a file. This version is designed for the novice user, and so doesn't allow tuning.

INPUTS:

  • numpts -- the number of points on the full interval. Must be divisible by 4. More factors of 2 will improve performance.
  • band_limit -- the bound on the degree of the expansion. In \(P^m_n\) notation this bounds n.
  • node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
  • weight -- an array of length numpts with the quadrature weights
  • epsilon -- the level at which to truncate. The error in the transform will be slightly larger than this

OUTPUTS:

  • fp -- the objects created are written onto the file pointed to by fp, in binary format. The file must be opened before calling this routine.
  • returns the compression ratio, which is an upper bound on the speed up factor of using this algorithm

NOTES:

  • node and weight are input so the user can select.
  • we hard-wire the largest number of levels legal. This will slow initialization but is the best thing for novice users
  • The safety factor on the bell may not be quite right
  • we leak a little bit of memory from the trig plans

Here is the call graph for this function:

void fold_e_e_1s ( REAL coef,
REAL fcn,
int  numpts,
REAL left,
int  n_left,
REAL right,
int  n_right,
REAL bell,
int  dir 
)

Computes the folded version of a function, which is then usually DCTed also can unfold

INPUTS:

  • coef -- a pointer to the left edge of the array for the folded function "coefficients"
  • fcn -- a pointer to the left end of the main interval
  • numpts -- the length of the main interval
  • left -- a pointer to the right edge of the interval that sits to the left of the main interval left will be called with negative indices
  • n_left-- the length of left to use. The bell spills this much into left, and the same amount into fcn.
  • right -- a pointer to the left edge of the interval that sits to the right of the main interval
  • n_right-- the length of right to use. The bell spills this much into right, and the same amount into fcn.
  • bell --- an array containing the bell values
  • dir -- which operation to do:
    • 1 : fold
    • -1 : unfold

OUTPUT:

  • if dir==1: coef -- the folded version of the function. length numpts is written on
  • if dir==-1: fcn,left,right -- the unfolded version of the function is ADDED to these arrays
  • else : no action

NOTES:

  • If all the data is contiguous, we will have right=fcn+numpts and left=fcn-1;
  • We could incorporate a skip if the data is transposed.

Here is the caller graph for this function:

void fold_e_o_1s ( REAL coef,
REAL fcn,
int  numpts,
REAL left,
int  n_left,
REAL right,
int  n_right,
REAL bell,
int  dir 
)

Computes the folded version of a function, which is then usually LCTed also can unfold.

INPUTS:

  • coef -- a pointer to the left edge of the array for the folded function "coefficients"
  • fcn -- a pointer to the left end of the main interval
  • numpts -- the length of the main interval
  • left -- a pointer to the right edge of the interval that sits to the left of the main interval. left will be called with negative indices.
  • n_left-- the length of left to use. The bell spills this much into left, and the same amount into fcn.
  • right -- a pointer to the left edge of the interval that sits to the right of the main interval
  • n_right-- the length of right to use. The bell spills this much into right, and the same amount into fcn.
  • bell --- an array containing the bell values
  • dir -- which operation to do:
    • 1 : fold
    • -1 : unfold

OUTPUT:

  • if dir==1: coef -- the folded version of the function. length numpts is written on
  • if dir==-1: fcn,left,right -- the unfolded version of the function is ADDED to these arrays
  • else : no action

NOTES:

  • If all the data is contiguous, we will have right=fcn+numpts and left=fcn-1;
  • We could incorporate a skip if the data is transposed

Here is the caller graph for this function:

This routine frees the memory that was given to a Banded_Matrix by init_bdmat_memory.

INPUT: in -- the Banded_Matrix to free. it must have been initialized by init_bdmat_memory, or we will attempt to free things not gotten by calloc, probaby causing catastrophic failure.

NOTES:

  • if used incorrectly, this routine may yield nasty memory bugs

Here is the caller graph for this function:

This routine frees the memory that was given to a Dense_Matrix by init_dmat_memory.

INPUT: in -- the Dense_Matrix to free. it must have been initialized by init_dmat_memory, or we will attempt to free things not gotten by calloc, probaby causing catastrophic failure.

NOTES:

  • If used incorrectly, this routine may yield nasty memory bugs.

Here is the caller graph for this function:

void ftgaqd ( int *  nlat,
REAL theta,
REAL wts,
REAL work,
int *  lwork,
int *  ierror 
)

This routine emulates gadq in the SPHEREPACK library

INPUTS:

  • nlat -- the number of gaussian colatitudes in the interval (0,pi) (between the two poles). nlat must be greater than zero.
  • work -- a temporary work space NOT USED
  • lwork -- the length of the work space NOT USED

OUTPUTS:

  • theta -- a REAL vector of length nlat containing the nlat gaussian colatitudes on the sphere in increasing radians in the interval (0,pi). Memory must already be allocated
  • wts -- a REAL vector of length nlat containing the nlat gaussian weights.Memory must already be allocated
  • ierror -- given value
    • 0 no errors
    • 2 if nlat<=0

NOTES:

  • the precision to which to compute the gaussian nodes is hard-wired at 1e-15.
  • If I make it ftgaqd_ can I call it from fortran??
  • Where is this routine used??

Here is the call graph for this function:

char* getoption ( const char *  flag,
int  argc,
char **  argv 
)

This routine allows command line options, usually of the form: program -d 256. Normal usage in the program is:

if(getoption("-d",argc,argv)!=NULL) 
   degree=atoi(getoption("-d",argc,argv));

The first call makes sure the option flag was indeed used. If it was, we call again to get the value as a string, then convert it using ato_ .

void give_bdmat_memory ( Banded_Matrix current,
Banded_Matrix previous 
)

This routine sets up memory for the next Banded_Matrix we wish to use.

It looks at the previous Banded_Matrix we used, and increments fields

INPUT: previous -- the last Banded_Matrix we used

OUTPUT: current -- the Banded_Matrix we wish to use next

We set the .band_p, .band_list, and .matrix fields to the next available ints and REAL. .max_size, .max_rows, and .max_bands are set to account for the amount used in previous. also set: .non_zero=0; .n_row=-1 .n_col=0

Here is the caller graph for this function:

void give_dmat_memory ( Dense_Matrix current,
Dense_Matrix previous 
)

This routine sets up memory for the next Dense_Matrix we wish to use.

It looks at the previous Dense_Matrix we used, and increments fields

INPUT: previous -- the last Dense_Matrix we used.

OUTPUT: current -- the Dense_Matrix we wish to use next. We align current->matrix to the first space not used by previous->matrix, and set current->max_size to account for the amount used in previous.

Here is the caller graph for this function:

void give_dvect_memory ( Dense_Vector current,
Dense_Vector previous 
)

This routine sets up memory for the next Dense_Vector we wish to use. It looks at the previous Dense_Vector we used, and increments fields

INPUT: previous -- the last Dense_Vector we used

OUTPUT: current -- the Dense_Vector we wish to use next We align current->vector to the first space not used by previous->vector, and set current->max_size to account for the amount used in previous.

Here is the caller graph for this function:

void gnode_to_Pmninit ( REAL ***  sofar,
REAL **  ex,
REAL **  why,
int **  size,
REAL gnode,
int  hemipts 
)

Converts gnodes to the initialization needed for next_rtsinPmncos.

Takes the place of the initialization step in next_rtsinPmncos(....,-1)

INPUTS:

  • sofar-- a 2xhalfpts matrix used to store intermediate results
  • ex -- an array of length halfpts containing
  • why-- an array of length halfpts containing
  • size--an array of integers of length halfpoints used to store the exponent of the power of 2 removed to stabilize our function values
  • hemipts-- the number of point in [0,PI/2]. We use parity for [PI/2,PI]

OUTPUTS : This function creates the arrays above:

  • ex - will be filled in with cos(gnode[i]).
  • why -- will be filled in with sin(gnode[i]).
void gsearch_to_ilist ( Local_Trig_Interval ilist_out,
int *  intervals_kept,
int  max_intervals,
Local_Trig_Interval ilist_in,
Dyadic_Gsearch_Save dy_gs_save,
int  halfpts 
)

This function converts the choices made by the dyadic_gsearch to a list of intervals.

INPUTS:

  • max_intervals -- the largest number of interval ilist_out can hold
  • ilist_in -- all possible intervals, in Local_Trig_Interval format
  • dy_gs_save -- a pointer to gsearch structure From its best_level and branching fields we extract the partition.
  • halfpts -- the numpts of data points this search covered In the usual case where the grow search is for half the true function interval, this is half the number of points.

OUTPUT:

  • ilist_out -- written onto with the list of interval. Note that we copy the structure Local_Trig_Interval, which includes pointers to other things. We do not copy the arrays they point to
  • intervals_kept -- the number of intervals kept is written here

NOTES:

  • It is possible to run over ilist_in
  • In 2007 fixed bug i=i++ --> i++

Here is the caller graph for this function:

Banded_Matrix init_bdmat_memory ( int  max_size,
int  max_rows,
int  max_bands 
)

This routine gets a large amount of memory for Banded_Matrix's, and gives it to a single Banded_Matrix to hold. This special Banded_Matrix acts as a pointer to this block of memory, and also keeps track of how large this memory is.

INPUT:

  • max_size -- the amount of memory to get for matrix
  • max_rows -- the amount of memory to get for band_p
  • max_bands -- (1/3) the amount of memory to get for band_list

OUPUT: returns a Banded_Matrix with

  • .max_size=max_size
  • .max_rows=max_rows+1
  • .max_bands=max_bands;
  • .non_zero=0;
  • .n_row=-1 This ruse allows give_bdmat_memory to pass along all the memory instead of losing one row
  • .n_col=0
  • .band_p pointing to max_rows+1 ints
  • .band_list pointing to 3*max_bands ints
  • .matrix pointing to max_size REALs

Here is the caller graph for this function:

void init_direct1 ( Pm_Direct_d **  out,
int  halfpts,
int  band_limit,
REAL node 
)

This routine initializes for the transform for spherical harmonics, direct version 1. This version simple precomputes the Pm matrices and stores them in dense matrix format.

INPUTS:

  • halfpts -- half the number of points on the full interval. This implies the full number of points is even, and we also assume the nodes are symmetric. Currrently halfpts must also be even for our trig transforms to work.
  • band_limit -- the bound on the degree of the expansion. In \(P^m_n\) notation this bounds n. For gaussian nodes this must be < 2*halfpts and for other nodes it should be even smaller. We confirn band_limit < 2*halfpts but leave the rest to the user.
  • node -- an array with the nodes, usually Arcsin of the usual gaussian nodes. We use halfpts of these, representing half of the interval.

OUTPUTS: out -- memory is allocated for band_limit (==order_limit) Pm_Direct_d structures, each representing the matrix for one m. Various bits of memory are allocated for these, and values filled in.

Here is the call graph for this function:

Here is the caller graph for this function:

Dense_Matrix init_dmat_memory ( unsigned int  n)

This routine gets a large amount of memory for Dense_Matrix's, and gives it to a single Dense_Matrix to hold. This special Dense_Matrix acts as a pointer to this block of memory, and also keeps track of how large this memory is.

INPUT: n -- the amount of memory to get

OUPUT: returns a Dense_Matrix with

  • .max_size=n
  • .n_row=0
  • .n_col=0
  • .matrix pointing to memory for n REAL's

Here is the caller graph for this function:

Dense_Vector init_dvect_memory ( unsigned int  n)

This routine gets a large amount of memory for Dense_Vector's, and gives it to a single Dense_Vector to hold. This special Dense_Vector acts as a pointer to this block of memory, and also keeps track of how large this memory is.

INPUT: n -- the amount of memory to get

OUPUT: returns a Dense_Vector with

  • .max_size=n
  • .n_row=0
  • .vector pointing to memory for n REAL's

Here is the caller graph for this function:

void init_fast1 ( Pm_1D **  out,
Fast1_Save saves_fordump,
int  halfpts,
int  band_limit,
REAL node,
int *  numlevels,
REAL  epsilon,
REAL(*)(int, int)  bell_choice 
)

This routine initializes for the fast transform for spherical harmonic, version 1.

Version 1 compresses in 1D (each Pmn separately) and chooses a single partition for each m and parity. By choosing a single partition we minimize the trig transform cost, but sacrifice precision. This is appropriate for small problems, near the break-even point, since it reduces overhead.

INPUTS:

  • halfpts -- half the number of points on the full interval. This implies the full number of points is even, and we also assume the nodes are symmetric. Currrently halfpts must also be even for our trig transforms to work.
  • band_limit -- the bound on the degree of the expansion. In \(P^m_n\) notation this bounds n. For gaussian nodes this must be < 2*halfpts and for other nodes it should be even smaller. We confirn band_limit < 2*halfpts but leave the rest to the user.
  • node -- an array with the nodes, usually Arcsin of the usual gaussian nodes. We use halfpts of these, representing half of the interval.
  • bell_choice -- a pointer to the bell function to use. This should be tuned to the desired precision. The choice of bell is subtle, and handled elsewhere.
  • numlevels -- the number of dyadic levels to use when trying to adapt. halfpts must be divisible by 2^numlevels. If numlevels is input too large or negative, this routine will choose the maximum allowed.
  • epsilon -- the target precision. The fast algorithm is an approximate method, tunable to the desired precision. The algorithm is faster the less precision (larger epsilon but < 1) you request. On the other hand, doubling the number of digits slows the algorithm by less than a factor of 2, so asking for a little extra is safe. If epsilon is chosen too close to the round-off error, the algorithm will degenerate to direct matrix applications.

OUTPUTS:

  • out -- memory is allocated for band_limit (==order_limit) Pm_1D structures, each representing the compressed matrix for one m. Various bits of memory are allocated for these, and values filled in.
  • saves_fordump -- bell values and total length are filled in here so we can dump them to file efficiently.
  • numlevels -- is modified to the maximum allowed number if the input number was illegal.

NOTES:

  • halfpts must be even for this routine to function. Performance (speed) of the algorithm will improve if halfpts has more factors of 2. For best performance, the non-power-of-two factor in halfpts should be small (10 or so).
  • output ? total coefficients,bands,rows,intervals ? modified num_levels ?some holder for the trig plans
  • leaks some memory
  • right FFTW flags? import and export wisdom?
  • min_gap hard wired
  • bdmat_raw may not have enough bands

Here is the call graph for this function:

Here is the caller graph for this function:

void init_Pm_1d_workspace ( Pm_1D_Workspace out,
int  numpts,
int  band_limit,
REAL node,
REAL weight 
)

This routine initializes a structure Pmncos_1D_Workspace, which is used to hold workspace and a few precomputed values for use in the synthesis/analysis routines.

INPUTS:

  • numpts -- the number of points in the interval (number of latitudes) used here to determine memory needed.
  • band_limit -- the limit on the degree of the expansion. Used here to determine the maximal number of coefficients possible for one order.
  • node -- an array of numpts REALs specifying the discretization. Usually this will be gaussian nodes.
  • weight -- an array of numpts REALs specifying the quadrature weight. Usually this will be gaussian weight.

OUTPUT: out -- memory is allocated for the fields of out, and some values are precomputed.

NOTE:

  • probably should pack the memory together

Here is the call graph for this function:

Here is the caller graph for this function:

void lctw ( REAL fcn,
Ftrigtw_Plan main_plan 
)

Computes the local cosine transform of fcn and puts the coeficients back into fcn.

The local cosine transform is also known as the type 4 discrete cosine transform.

This is the LCT at half integer points. Is its own inverse.

INPUTS:

  • fcn array containing the data
  • main_plan -- a structure holding precomputed trig functions, the plan used by fftw, and the number of points; initialized by lctw_init.

OUTPUT: fcn. Puts the coefficients in fcn. specifically: \[ f(k) =norm \sum_{j=0}^{numpts-1} f(j) \cos\left(\pi\frac{(k+1/2)(j+1/2)}{numpts}\right) \]

         numpts-1              (k+0.5)  (j+0.5)
  fcn[k]= SUM   fcn[j] cos( PI ----------------  ) *norm
         j=0                         numpts   

set for \(L^2\) normalization.

NOTES:

  • comment better
  • requires even numpts. checked by lctw_init

Here is the caller graph for this function:

void lctw_init ( Ftrigtw_Plan main_plan,
int  flags,
int  numpts 
)

This routine initializes for lctw

We initialize trigonometic things used by the lct and dct portions, and call the initailizer for the fftw real transform (forward).

INPUTS:

  • main_plan -- a structure that will hold the things we generate Memory for its fields is allocated in this routine.
  • flags -- flags passed directly to the rfftw initializer
  • numpts -- the number of points on which we will transform MUST be even

OUTPUTS: main_plan is filled in:

NOTES:

  • comment better

Here is the caller graph for this function:

void load_bin_bdmat ( Banded_Matrix x,
FILE *  in 
)

This routine boads a Banded_Matrix to a file in binary format, as written by dump_bin_bdmat

INPUTS:

  • x -- the Banded_Matrix. Should be initialized as follows
    • .max_rows is the amount of row memory available
    • .band_p points to .max_rows ints
    • .max_bands is the amount of band memory available
    • .band_list points to 3*.max_bands ints
    • .max_size is the amount of main memory available
    • .matrix points to .max_size REAL
  • in -- A pointer to the file to read from. Setting this to stdin makes it read from the screen. If in==NULL we use stdin. The reading mode ("rb") is determined outside of this routine.

OUTPUT: x -- is filled in.

Here is the caller graph for this function:

void load_bin_dmat ( Dense_Matrix x,
FILE *  in 
)

This routine loads a Dense_Matrix from a file in binary format, as written by dump_bin_dmat.

INPUTS:

  • x -- the Dense_Matrix to be filled in. Should be initialized with
    • .max_size indicating the amount of memory available
    • .matrix pointing to max_size REALs
  • in -- A pointer to the file to read from. Setting this to stdin makes it read from the screen. If in==NULL we use stdin. The reading mode ("rb") is determined outside of this routine.

OUTPUT: x -- is filled in.

Here is the caller graph for this function:

void load_direct1 ( int *  numpts_ptr,
int *  band_limit_ptr,
REAL **  node_ptr,
REAL **  weight_ptr,
Pm_Direct_d **  pmn_matrix_ptr,
FILE *  fp 
)

This routine loads the initialization dumped by dump_direct1 We interpret what we read and re-align pointers and such to get things ready to use

INPUTS: fp -- the objects are read from the file pointed to by fp, in binary format. The file must be opened before calling this routine.

OUTPUTS:

  • numpts -- the number of points on the full interval.
  • band_limit -- the bound on the degree of the expansion. In P^m_n notation this bounds n.
  • node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
  • weights -- an array of length numpts with the quadrature weights
  • pmn_matrix -- an array of band_limit (==order_limit) Pm_Direct_d structures, each representing the matrix for one m.

Here is the call graph for this function:

Here is the caller graph for this function:

void load_fast1 ( int *  numpts_ptr,
int *  band_limit_ptr,
int *  num_levels_ptr,
REAL **  node_ptr,
REAL **  weight_ptr,
Fast1_Save saves_fordump,
Pm_1D **  pmn_matrix_ptr,
FILE *  fp 
)

This routine loads the initialization dumped by dump_fast1.

We interpret what we read and re-align pointers and such to get things ready to use.

INPUTS: fp -- the objects are read from the file pointed to by fp, in binary format. The file must be opened before calling this routine

OUTPUTS:

  • numpts -- the number of points on the full interval.
  • band_limit -- the bound on the degree of the expansion. In P^m_n notation this bounds n.
  • num_levels -- the number of dyadic levels used in the adaption
  • node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
  • weights -- an array of length numpts with the quadrature weights
  • saves_fordump -- a structure containing the information needed for the bells. Constructed by init_fast1
  • pmn_matrix -- an array of band_limit (==order_limit) Pm_1D structures, each representing the compressed matrix for one m.

NOTES:

  • num_levels is not needed by the synthesis/analysis routines. It may be useful to keep it so we could re-dump.
  • saves_for dump is also not needed. we could free it here.
  • we free plan_holder, thus losing our handle on the trig plans This may cause memory leaks since we can't free them easily
  • We now have separate workspace for each trig plan, but these could be combined.

Here is the call graph for this function:

Here is the caller graph for this function:

void ltrigint_init ( Local_Trig_Interval out,
int  numpts,
int  left_extra,
int  right_extra,
int  fcn_offset,
int  coef_offset,
REAL(*)(int, int)  bell_choice,
int  which_transform,
int  fftw_flags 
)

This routine initializes a Local_Trig_Interval.

INPUTS:

  • numpts
  • left_extra
  • right_extra
  • fcn_offset
  • coef_offset
  • which_transform -- are copied into the structure
  • bell_choice -- a pointer to the bell function to use
  • fftw_flags -- flags passed to the fftw initializer

OUTPUTS: out -- is written onto with the above info.

  • Memory is obtained for .bell and .dualbell and they are initialized.
  • .transform_plan is initialized for the transform indicated by which_transform.

NOTES:

  • document better

Here is the call graph for this function:

Here is the caller graph for this function:

void mk_gnode_gweight ( REAL gnode,
REAL gweight,
REAL  epsilon,
int  numpts 
)

This routine constructs the gaussian nodes and weights.

We make cosine inverse of the ordinary gaussian nodes.

INPUTS:

  • gnode -- the array of length numpts in which to put the gaussian nodes. Memory must already be allocated.
  • gweight -- the array in which to put the gaussian weights. Memory must already be allocated.
  • epsilon -- the precision to which to compute the gaussian nodes.
  • numpts -- the order of the nodes and weights we compute this is the length of gnode and gweight.

OUTPUTS:

  • gnode -- the numpts gaussian node values are written on gnode We make cosine inverse of the ordinary gaussian nodes.
  • gweight -- numpts values of gweight are filled in

Here is the call graph for this function:

Here is the caller graph for this function:

void next_rtsinPmncos ( REAL outfcn,
Pmncos_Recurrence_Save pmn_save,
int  halfpts,
int  order,
int  index 
)

Do one step in the recurrence process then output the results.

This saves on memory if we are trying to compress all associated Legendre fcns of some order. This version is super stabilized for all conditions.

INPUTS:

  • out-- the results will be put in this array of length halfpts, in format out[i]=C^m_n P^m_n(pmn_save->ex[i])*sqrt(pmn_save->why[i]). Generally pmn_save->ex[i] is cos(quadrature_point[i]) pmn_save->ex[i] is sin(quadrature_point[i])
  • pmn_save -- a structure that holds some precomputed trig values and the results of previous recurrences. Must be initialized with Pmncos_resave_init.
  • halfpts-- the number of points to use. Generally this is the number of points in [0,PI/2] and we use parity for [PI/2,PI]. This routine does not care, however, because the location of the points is encoded in pmn_save.
  • order -- the order of the associated legendre function desired. In P^m_n notation this is m
  • index--- the index desired minus m. In P^m_{m+k} notation this is k.

OUTPUTS : results are output in outfcn, above

USAGE: You must call this function for successive values of index, starting at 0. Order must remain fixed.

NOTES:

  • We pass type REAL to several functions which expect double. This should not cause trouble but still might.
  • Is this the correct norm fix ??

Here is the caller graph for this function:

void Pm_1d_analyse ( REAL outcoef,
Pm_1D inmat,
REAL infcn,
Pm_1D_Workspace pmnwork 
)

Expands (analyses) a function into spherical harmonic coefficients.

This routine only does one order (m) so it is really only an Associaded Legendre Function analyser.

INPUTS:

  • inmat -- The precomuted matrix of Associated Legendre functions of the desired order in compressed Pm_1D format
  • infcn -- an array of REALs to hold the function to expand. Its length is encoded in the number of columns in inmat.
  • pmnwork -- a structure that holds workspace and a few global precomputed values.

OUTPUTS: outcoef -- an array of the coefficients computed. The information on how many of these to compute is encoded by the number of rows in inmat.

NOTES:

  • We could check that (numbas[0]==numbas[1])||(numbas[0]==numbas[1]+1)
  • ?UNROLL the parity LOOP ONCE DEBUGGED?

Here is the call graph for this function:

void Pm_1d_synthesize ( REAL outfcn,
Pm_1D inmat,
REAL incoef,
Pm_1D_Workspace pmnwork 
)

Evaluates (synthesizes) a function from spherical harmonic coefficients.

This routine only does one order (m) so it is really only an Associaded Legendre Function synthesizer.

INPUTS:

  • inmat -- The precomuted matrix of Associated Legendre functions of the desired order in compressed Pm_1D format
  • incoef -- an array of the coefficients to use. The information on how many of these to use is encoded by the number of rows in inmat.
  • pmnwork -- a structure that holds workspace and a few global precomputed values

OUTPUTS: outfcn -- an array of REALs to hold the computed function. Its length is encoded in the number of columns in inmat.

NOTES:

  • we could check that (numbas[0]==numbas[1])||(numbas[0]==numbas[1]+1)

Here is the call graph for this function:

void Pm_direct1_analyse ( REAL outcoef,
Pm_Direct_d inmat,
REAL infcn,
Pm_1D_Workspace pmnwork 
)

Expand (analyses) a function into spherical harmonic coefficients.

This routine only does one order (m) so it is really only an Associated Legendre Function analyser.

INPUTS:

  • inmat -- The precomuted matrix of Associated Legendre functions of the desired order in Pm_Direct_d format
  • infcn -- an array of REALs to hold the function to expand. Its length is encoded in the number of columns in inmat
  • pmnwork -- a structure that holds workspace and a few global precomputed values

OUTPUTS: outcoef -- an array of the coefficients computed. The information on how many of these to compute is encoded by the number of rows in inmat.

NOTES:

  • We could check that (numbas[0]==numbas[1])||(numbas[0]==numbas[1]+1)
  • ?UNROLL the parity LOOP ONCE DEBUGGED?

Here is the call graph for this function:

void Pm_direct1_synthesize ( REAL outfcn,
Pm_Direct_d inmat,
REAL incoef,
Pm_1D_Workspace pmnwork 
)

Evaluates (synthesizes) a function from spherical harmonic coefficients.

This routine only does one order (m) so it is really only an Associaded Legendre Function synthesizer.

INPUTS:

  • inmat -- The precomuted matrix of Associated Legendre functions of the desired order in Pm_Direct_d format
  • incoef -- an array of the coefficients to use. The information on how many of these to use is encoded by the number of rows in inmat.
  • pmnwork -- a structure that holds workspace and a few global precomputed values

OUTPUTS: outfcn -- an array of REALs to hold the computed function. Its length is encoded in the number of columns in inmat

NOTES:

  • We could check that (numbas[0]==numbas[1])||(numbas[0]==numbas[1]+1)

Here is the call graph for this function:

REAL Pmncos ( int  order,
int  index,
REAL  phi 
)

Evaluate an associated Legendre function P^m_n at any point.

This routine is super stabilized for all conditions.

INPUTS:

  • order -- the order of the associated legendre function you want. In P^m_n notation this is m. order>=0
  • index -- Which frequency to use, starting from the lowest legal. In P^m_n notation, n is m+index. index >=0
  • phi -- The point at which to evaluate. 0< phi < PI

OUTPUTS: returns \(cP^m_n(\cos(\phi))\) with c to make L^2 normalized.

                                m
                               P (cos(phi)) * const
                                n

The constant is set so we are L^2 normalized. Since the number of points to be used is not known, we cannot adjust for the discretization.

NOTES:

  • We pass type REAL to several functions which expect double. This should not cause trouble but still might.
  • Just like dbrtsinPmncos except for sqrt(sin) factor.

destroy (free) a structure Pmncos_Recurrence_Save

INPUTS: pmn_save -- the structure to free

OUTPUTS : The memory held by pmn_save is freed

Here is the caller graph for this function:

void Pmncos_resave_init ( Pmncos_Recurrence_Save pmn_save,
REAL node,
int  numpts 
)

Converts a set of nodes (often gaussian) to the initialization needed for the associated legendre function recurrence.

INPUTS:

  • pmn_save -- a structure to hold the outputs
  • node -- and array holding the sampling nodes. The array is of length (numpts+1)/2 and the values should be in [0,PI/2]. We assume the sampling on (PI/2,PI] is symmetric
  • numpts -- the number of sample points. We use half of these (plus 1 if odd)

OUTPUTS : This function creates memory for the arrays in pmn_save and initializes some values.

Here is the caller graph for this function:

void pointwise_multiply ( REAL out,
REAL in_1,
REAL in_2,
int  numpts 
)

Pointwise multiply two vectors to form the third.

INPUTS:

  • out -- a vector of length numpts in which to put the output. May be the same as in_1 or in_2
  • in_1 -- the first input vector, of length numpts
  • in_2 -- the second input vector, of length numpts
  • numpts -- the length of the vectors

OUTPUTS: out -- is written onto; out[i]=in_1[i]*in_2[i];

Here is the caller graph for this function:

void reflect_evenodd ( REAL even,
REAL odd,
REAL whole,
int  numpts,
int  dir 
)

Form the even and odd parts of the input vector.

It can also reverse the process.

INPUTS:

  • even -- In forward mode, the even portion will be put here. In reverse mode, the even portion is read from here. Of length (numpts+1)/2.
  • odd -- In forward mode, the odd portion will be put here. In reverse mode, the odd portion is read from here. Of length numpts/2.
  • whole -- in forward mode, the fcn to read from. In reverse mode, where to put the function. Of length numpts.
  • numpts -- the length of whole and twice the length of even and odd. If numpts is odd, the extra point goes to even.
  • dir -- the direction to go in
    • dir==1 fold whole into even/odd
    • dir==-1 unfold even/odd into whole
    • else error

OUTPUTS:

  • In forward mode even and out are written onto, with the even/odd reflections of whole:

    • even[i]=whole[i]+whole[numpts-1-i]
    • odd[i]=whole[i]-whole[numpts-1-i]

    If the number of points is odd, the center point is copied into even, and omitted from odd.

  • in reverse mode undoes this process

NOTES:

  • the exception handling for odd numpts might slow this down

Here is the caller graph for this function:

void remove_rootsin ( REAL out,
REAL in,
REAL node,
int  numpts 
)

Divide the vector in by sqrt(sin( node values)).

INPUTS:

  • out -- a pointer to where to put the outputs. May be the same as in.
  • in -- the input zector, of length numpts
  • node -- and array holding the sampling nodes, corresponding to numpts physical points. Should be in (0,PI)
  • numpts -- the number of sample points. (plus 1 if odd)

OUTPUTS : out -- is written onto out[i]=in[i]/sqrt(sin(node[i]))

NOTES:

  • For safety we do sqrt(fabs(sin(node[i]))) This is so if the node value is out of range the routine doesn't crash
  • We could still crash by dividing be zero

Here is the caller graph for this function:

void rfftw_to_NR_format ( REAL data_nr,
REAL data_rfftw,
int  numpts,
int  dir 
)

Translates between the formats for the real discrete fourier transform used by the rfftw package and the Numerical Recipies routines. This helps allow the NR DCT routine to call the rfftw.

INPUTS:

  • data_nr -- The data in the nr format: real[0],real[n/2],real[1],imag[1]... ...real[n/2-1],imag[n/2-1]
  • data_rfftw--The data in the rfftw format: real[0],real[1],...real[n/2], imag[(n+1)/2-1],....,imag[1]
  • numpts -- the length n of the vectors; MUST be even
  • dir
    • 1 for rfftw -> nr
    • -1 for nr -> rfftw

OUTPUTS:

  • if dir==1, data_nr is written onto
  • if dir==-1, data_rfftw is written onto

NOTES:

  • we must have REAL==fftw_real
  • there is a sign convention difference on the imaginary parts realfft_pre(,,,,1); versus rfftw_one(rw_plan_forward,,);

Here is the caller graph for this function:

double second ( )

return the current time for performance timings