Header for a library for doing the Fast Transform for Spherical Harmonics. More...

#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <time.h>
#include "fftw.h"
#include "rfftw.h"

Data Structures
struct	Ftrigtw_Plan
	Holder for precomputed FFTW objects. More...
struct	Dense_Matrix
	An ordinary (one dimensional) matrix. More...
struct	Dense_Vector
	An ordinary (one dimensional) vector. More...
struct	Banded_Matrix
	A one dimensional sparse matrix, with banded structure. More...
struct	Bdmat_Op_Workspace
	Workspace for Banded_Matrix routines. More...
struct	Dyadic_Gsearch_Save
	Workspace for "best non-decreasing dyadic partition" search. More...
struct	Fast1_Save
	Extra things to save for fast1 algorithm. More...
struct	Local_Trig_Interval
	Information to do a local trigonometric transform on an interval. More...
struct	Pm_1D
	Hold a compressed matrix of Associate Legendre functions of a fixed order. More...
struct	Pm_Direct_bd
	Hold a banded matrix of Associate Legendre functions of a fixed order. More...
struct	Pm_Direct_d
	Hold a dense matrix of Associate Legendre functions of a fixed order. More...
struct	Pm_1D_Workspace
	Hold workspace and a few precomputed values for the fast synthesis/analysis. More...
struct	Pmncos_Recurrence_Save
	Hold data and workspace to do the three-term recurrence for a vector of sample points. More...
Defines
#define	PI 3.14159265358979323846264338328
#define	USE_double 1
#define	REAL double
Functions
Dense_Vector	init_dvect_memory (unsigned int n)
void	give_dvect_memory (Dense_Vector current, Dense_Vector previous)
Dense_Matrix	init_dmat_memory (unsigned int n)
void	give_dmat_memory (Dense_Matrix current, Dense_Matrix previous)
void	free_dmat_memory (Dense_Matrix *in)
void	dump_bin_dmat (Dense_Matrix x, FILE out)
void	load_bin_dmat (Dense_Matrix x, FILE in)
void	dmat_dvect_multiply (Dense_Vector out, Dense_Matrix inmat, Dense_Vector *invect)
void	dmat_t_dvect_multiply (Dense_Vector out, Dense_Matrix inmat, Dense_Vector *invect)
Banded_Matrix	init_bdmat_memory (int max_size, int max_rows, int max_bands)
void	give_bdmat_memory (Banded_Matrix current, Banded_Matrix previous)
void	free_bdmat_memory (Banded_Matrix *in)
void	bdmat_copy (Banded_Matrix out, Banded_Matrix in)
void	dump_bin_bdmat (Banded_Matrix x, FILE out)
void	load_bin_bdmat (Banded_Matrix x, FILE in)
void	bdmat_dvect_multiply (Dense_Vector out, Banded_Matrix inmat, Dense_Vector *invect)
void	bdmat_t_dvect_multiply (Dense_Vector out, Banded_Matrix inmat, Dense_Vector *invect)
void	dmat_to_bdmat (Banded_Matrix out, Dense_Matrix in, REAL cutoff, int min_gap)
int	dyadic_gsearch (Dyadic_Gsearch_Save *dy_gs_save)
void	dyadic_gsearch_init (Dyadic_Gsearch_Save *dy_gs_save, int num_levels)
void	dyadic_gsearch_free (Dyadic_Gsearch_Save *dy_gs_save)
void	fast1_fromfile (int numpts_ptr, int band_limit_ptr, REAL node_ptr, REAL weight_ptr, Pm_1D *pmn_matrix_ptr, Pm_1D_Workspace pmnwork, FILE *fp)
void	load_fast1 (int numpts_ptr, int band_limit_ptr, int num_levels_ptr, REAL node_ptr, REAL weight_ptr, Fast1_Save saves_fordump, Pm_1D *pmn_matrix_ptr, FILE fp)
void	direct1_fromfile (int numpts_ptr, int band_limit_ptr, REAL node_ptr, REAL weight_ptr, Pm_Direct_d *pmn_matrix_ptr, Pm_1D_Workspace pmnwork, FILE *fp)
void	load_direct1 (int numpts_ptr, int band_limit_ptr, REAL node_ptr, REAL weight_ptr, Pm_Direct_d *pmn_matrix_ptr, FILE fp)
void	dctw (REAL fcn, Ftrigtw_Plan main_plan, int dir)
void	dctw_init (Ftrigtw_Plan *main_plan, int flags, int numpts)
void	rfftw_to_NR_format (REAL data_nr, REAL data_rfftw, int numpts, int dir)
void	lctw (REAL fcn, Ftrigtw_Plan main_plan)
void	lctw_init (Ftrigtw_Plan *main_plan, int flags, int numpts)
REAL	dualabell (REAL(*origbell)(int, int), int laplength, int t)
void	bell_1size_init (REAL bellout, REAL(whichbell)(int, int), int lapleft, int center, int lapright, int dual_flag)
void	fold_e_e_1s (REAL coef, REAL fcn, int numpts, REAL left, int n_left, REAL right, int n_right, REAL *bell, int dir)
void	fold_e_o_1s (REAL coef, REAL fcn, int numpts, REAL left, int n_left, REAL right, int n_right, REAL *bell, int dir)
REAL	Pmncos (int order, int index, REAL phi)
void	Pmncos_resave_init (Pmncos_Recurrence_Save pmn_save, REAL node, int numpts)
void	next_rtsinPmncos (REAL outfcn, Pmncos_Recurrence_Save pmn_save, int halfpts, int order, int index)
void	Pmncos_resave_destroy (Pmncos_Recurrence_Save *pmn_save)
void	direct1_tofile (int numpts, int band_limit, REAL node, REAL weight, FILE *fp)
void	init_direct1 (Pm_Direct_d *out, int halfpts, int band_limit, REAL node)
void	dump_direct1 (int numpts, int band_limit, REAL node, REAL weight, Pm_Direct_d pmn_matrix, FILE fp)
REAL	fast1_tofile (int numpts, int band_limit, REAL node, REAL weight, REAL epsilon, FILE *fp)
void	init_fast1 (Pm_1D *out, Fast1_Save saves_fordump, int halfpts, int band_limit, REAL node, int numlevels, REAL epsilon, REAL(*bell_choice)(int, int))
void	dump_fast1 (int numpts, int band_limit, int num_levels, REAL node, REAL weight, Fast1_Save saves_fordump, Pm_1D pmn_matrix, FILE *fp)
void	Pm_direct1_analyse (REAL outcoef, Pm_Direct_d inmat, REAL infcn, Pm_1D_Workspace pmnwork)
void	Pm_direct1_synthesize (REAL outfcn, Pm_Direct_d inmat, REAL incoef, Pm_1D_Workspace pmnwork)
REAL	bellmat1 (int laplength, int t)
REAL	bellmat2 (int laplength, int t)
REAL	bellmat3 (int laplength, int t)
REAL	bellmat4 (int laplength, int t)
REAL	bellmat5 (int laplength, int t)
REAL	bellmat6 (int laplength, int t)
REAL	bellmat7 (int laplength, int t)
REAL	bellmat8 (int laplength, int t)
REAL	bellmat9 (int laplength, int t)
REAL	bellmat10 (int laplength, int t)
REAL	bellmat11 (int laplength, int t)
REAL	bellmat12 (int laplength, int t)
REAL	bellmat13 (int laplength, int t)
REAL	bellmat14 (int laplength, int t)
REAL	bellmat15 (int laplength, int t)
REAL	bellmat16 (int laplength, int t)
REAL	bellmat17 (int laplength, int t)
REAL	bellmat18 (int laplength, int t)
REAL	bellmat19 (int laplength, int t)
REAL	bellmat20 (int laplength, int t)
void	Pm_1d_analyse (REAL outcoef, Pm_1D inmat, REAL infcn, Pm_1D_Workspace pmnwork)
void	Pm_1d_synthesize (REAL outfcn, Pm_1D inmat, REAL incoef, Pm_1D_Workspace pmnwork)
void	mk_gnode_gweight (REAL gnode, REAL gweight, REAL epsilon, int numpts)
void	mk_cosi_gnode (REAL *gnode, REAL epsilon, int numpts)
void	cosi_gnode_to_gweight (REAL gweight, REAL gnode, int numpts)
double	second ()
char *	getoption (const char flag, int argc, char *argv)
void	pointwise_multiply (REAL out, REAL in_1, REAL *in_2, int numpts)
void	reflect_evenodd (REAL even, REAL odd, REAL *whole, int numpts, int dir)
void	remove_rootsin (REAL out, REAL in, REAL *node, int numpts)
void	dyadic_g_ilist_init (Local_Trig_Interval ilist[2], int ints_used, int coefs_used, REAL bell_handle, int bell_length, int *bell_offsets, int halfpts, int num_levels, REAL(bell_choice)(int, int), int flags)
void	ltrigint_init (Local_Trig_Interval out, int numpts, int left_extra, int right_extra, int fcn_offset, int coef_offset, REAL(bell_choice)(int, int), int which_transform, int fftw_flags)
void	gsearch_to_ilist (Local_Trig_Interval ilist_out, int intervals_kept, int max_intervals, Local_Trig_Interval ilist_in, Dyadic_Gsearch_Save dy_gs_save, int halfpts)
void	evaluate_intlist (REAL outfcn, REAL incoef, Local_Trig_Interval *interval_list, int num_intervals)
void	expand_d_intlist (REAL outcoef, REAL infcn, Local_Trig_Interval *interval_list, int num_intervals)
void	expand_intlist (REAL outcoef, REAL infcn, Local_Trig_Interval *interval_list, int num_intervals)
int	count_thresh (REAL *lambda, int howmany, REAL thresh)
void	init_Pm_1d_workspace (Pm_1D_Workspace out, int numpts, int band_limit, REAL node, REAL *weight)
void	ftgaqd (int nlat, REAL theta, REAL wts, REAL work, int lwork, int ierror)
void	gnode_to_Pmninit (REAL *sofar, REAL ex, REAL why, int size, REAL *gnode, int hemipts)

Detailed Description

Header for a library for doing the Fast Transform for Spherical Harmonics.

Define Documentation

#define REAL double

REAL is the chosen floating-point data type (float or double)

#define USE_double 1

USE_double sould be defined by a compiler flag

0 for float
1 for double

Function Documentation

void bdmat_copy	(	Banded_Matrix *	out,
		Banded_Matrix *	in
	)

This routine copies a Banded_Matrix.

INPUTS: in -- the Banded_Matrix to copy

OUTPUT: out -- the values of in are copied to out Should be initialized as follows

.max_rows is the amount of row memory available
.band_p points to .max_rows ints
.max_bands is the amount of band memory available
.band_list points to 3*.max_bands ints
.max_size is the amount of main memory available
.matrix points to .max_size REAL

Here is the caller graph for this function:

void bdmat_dvect_multiply	(	Dense_Vector *	out,
		Banded_Matrix *	inmat,
		Dense_Vector *	invect
	)

This routine applies a Banded_Matrix to a Dense_Vector to produce a Dense_Vector. The matrix may be square or rectangular.

INPUTS:

out -- the Dense_Vector to put the result in. Should be initialized as follows:
- .max_size is the amount of memory available.
- .vector points to .max_size REAL
inmat -- the Banded_Matrix.
invect -- the Dense_Vector

OUTPUT: out is modified so out = inmat * invect

NOTES:

might want to hold some things as local registers, rather than repeatedly accessing arrays.
looping structure may be inefficient

Here is the caller graph for this function:

void bdmat_t_dvect_multiply	(	Dense_Vector *	out,
		Banded_Matrix *	inmat,
		Dense_Vector *	invect
	)

This routine applies the TRANSPOSE of a Banded_Matrix to a Dense_Vector to produce a Dense_Vector. The matrix may be square or rectangular.

INPUTS:

out -- the Dense_Vector to put the result in. Should be initialized as follows
- .max_size is the amount of memory available
- .vector points to .max_size REAL
inmat -- the Banded_Matrix.
invect -- the Dense_Vector

OUTPUT: out is modified so out = TRANSPOSE(inmat) * invect

NOTES:

might want to hold some things as local registers, rather than repeatedly accessing arrays
looping structure may be inefficient

Here is the caller graph for this function:

void bell_1size_init	(	REAL *	bellout,
		REAL(*)(int, int)	whichbell,
		int	lapleft,
		int	center,
		int	lapright,
		int	dual_flag
	)

Computes bell values for a given size and overlap configuration.

INPUTS:

whichbell -- a pointer to a bell function. The bells have the form bell(laplength,t) with
- laplength -- the number of POINTS the bell spills out of the interval. It also spills in an equal number of points. Thus the bell takes 2*laplength points to rise from 0 to 1.
- t -- Which point you want to evaluate the bell at. This should range from 0 to 2*laplength-1. bell(laplength,0)=~0 then it rises until bell(laplength,2*laplength-1)=~1 The edge of the interval is between points, at t=(laplength-1) +0.5 The bell function returns a REAL which is the bell's value at that point.
lapleft -- the laplength to use at the left edge
center -- the main interval length. must be at least lapleft+lapright. If it is longer than this, the bell is given value 1 on the extra portion
lapright-- the laplength to use at the right edge
dual_flag -- a flag for if we should make the dual of the input bell. Use for inverting bi-orthogonal bells
- 0 (or !=1) -- normal, don't dual
- 1 -- make the dual

OUTPUT: bellout --is written onto. The first lapleft+center+lapright entries are used.

Generally, the bell will look like

                               ---------------------\               
                         _____/                      -\______       
           _________/----                                    - ___  
    ___/---                                                        \-
/--                                                                 \
|---------------|--------------|--------------------|--------|--------|
   lapleft      | lapleft                            lapright|lapright
                |- - - - - - - - - center - - - - - - - - - -|

NOTES:

It is legal to have lapleft and or lapright equal to zero
We could call memory internally, but we allow the user to try to localize memory.

Here is the call graph for this function:

Here is the caller graph for this function:

void cosi_gnode_to_gweight	(	REAL *	gweight,
		REAL *	gnode,
		int	numpts
	)

This routine computes the gaussian weights that go with a set of gaussian nodes.

The nodes input here are $\cos^{-1}(\text{the usual nodes} )$.

INPUTS:

gweight -- the array in which to put the gaussian weights. Memory must already be allocated.
gnode -- the gausian nodes, theta values between 0 and PI This is $\cos^{-1}(\text{the usual nodes} )$.
numpts -- the order of the weights we compute this is the length of gnode and gweight.

OUTPUTS: numpts values of gweight are filled in

NOTES:

We use the Christoffel function method, mainly because we can avoid derivatives.
Do I use normalized Pmn?

Here is the caller graph for this function:

int count_thresh	(	REAL *	lambda,
		int	howmany,
		REAL	thresh
	)

This function counts how many members of lambda are above thresh in absolute value.

INPUTS:

lambda-- an array of REALs of length howmany
howmany-- the length of lambda
thresh -- the threshhold used in the cutoff

OUTPUT: -- returns the count

void dctw	(	REAL *	fcn,
		Ftrigtw_Plan *	main_plan,
		int	dir
	)

Computes the disrete cosine transform of fcn and puts the coefficients back into fcn

This is the DCT at half integer points.

INPUTS:

fcn -- array containing the data
main_plan -- a structure holding precomputed trig functions, the plan used by fftw,and the number of points initialized by dctw_init
dir -- the direction to transform. 1 is expansion and -1 evaluation.

OUTPUT: fcn.

In forward mode (dir==1), puts the coefficients in fcn. \[f(k) = norm \sum_{j=0}^{numpts-1} f(j) \cos\left(\pi\frac{k(j+1/2)}{numpts}\right) \] set for $L^2$ normalization.

     numpts-1              k  (j+0.5)
fcn[k]= SUM   fcn[j] cos( PI ----------------  ) *norm
       j=0                         numpts   
       set for L^2 normalization.

In reverse mode inverts this process.

NOTES:

comment better
requires even numpts. checked by dctw_init
precompute sqrt(2.0)?
slow and awkward

Here is the call graph for this function:

Here is the caller graph for this function:

void dctw_init	(	Ftrigtw_Plan *	main_plan,
		int	flags,
		int	numpts
	)

This routine initializes for dctw

We initialize trigonometic things used by the lct and dct portions,
and call the initailizer for the fftw real transform (forward).

INPUTS: 
- main_plan -- a structure that will hold the things we generate
  Memory for its fields is allocated in this routine.
- flags -- flags passed directly to the rfftw initializer
- numpts -- the number of points on which we will transform
  MUST be even

OUTPUTS: main_plan is filled in:

NOTES:
- comment better

keep these always as double so we don't lose precision

Here is the caller graph for this function:

void direct1_fromfile	(	int *	numpts_ptr,
		int *	band_limit_ptr,
		REAL **	node_ptr,
		REAL **	weight_ptr,
		Pm_Direct_d **	pmn_matrix_ptr,
		Pm_1D_Workspace *	pmnwork,
		FILE *	fp
	)

This routine loads the initialization dumped by dump_direct1, interprets it and does all the set-up necessary to do the synthesis or analysis transform.

INPUTS: fp -- the objects are read from the file pointed to by fp, in binary format. The file must be opened before calling this routine.

OUTPUTS:

numpts -- the number of points on the full interval.
band_limit -- the bound on the degree of the expansion. In $P^m_n$ notation this bounds n.
node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
weight -- an array of length numpts with the quadrature weights
pmn_matrix -- an array of band_limit (==order_limit) Pm_Direct_d structures, each representing the matrix for one m.
pmn_work -- a structure holding workspace and other precomputed values used by the transform, independent of m

NOTES:

node and weight are not needed for the synthesis/analysis routines but are provided here for other uses.

Here is the call graph for this function:

void direct1_tofile	(	int	numpts,
		int	band_limit,
		REAL *	node,
		REAL *	weight,
		FILE *	fp
	)

This routine initializes the direct transform for spherical harmonics (version 1) and dumps it to a file.

INPUTS:

numpts -- the number of points on the full interval. Must be divisible by 2.
band_limit -- the bound on the degree of the expansion. In $P^m_n$ notation this bounds n.
node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
weight -- an array of length numpts with the quadrature weights

OUTPUTS: fp - the objects created are written onto the file pointed to by fp, in binary format. The file must be opened before calling this routine.

NOTES:

node and weight are input so the user can select.

Here is the call graph for this function:

void dmat_dvect_multiply	(	Dense_Vector *	out,
		Dense_Matrix *	inmat,
		Dense_Vector *	invect
	)

Apply a matrix to a vector to get a new vector. The matrix is in Dense_Matrix format, and the vectors in Dense_Vector format.

INPUTS:

out -- the Dense_Vector to put the result in. Should be initialized as follows:
- .max_size is the amount of memory available.
- .vector points to .max_size REAL
invect -- an input vector in Dense_Vector format
inmat -- the input matrix in Dense_Matrix format

OUTPUT: out -- inmat * invect = out

NOTES:

ordering of loops not optimized
? use registers for n_row n_col?
blocking could help speed

Here is the caller graph for this function:

void dmat_t_dvect_multiply	(	Dense_Vector *	out,
		Dense_Matrix *	inmat,
		Dense_Vector *	invect
	)

Apply the TRANSPOSE of a matrix to a vector to get a new vector. The matrix is in Dense_Matrix format, and the vectors in Dense_Vector format.

INPUTS:

out -- the Dense_Vector to put the result in. Should be initialized as follows:
- .max_size is the amount of memory available.
- .vector points to .max_size REAL
invect -- an input vector in Dense_Vector format
inmat -- the input matrix in Dense_Matrix format

OUTPUT: out -- TRANSPOSE(inmat) * invect = out

NOTES:

ordering of loops not optimized
blocking may improve speed

Here is the caller graph for this function:

void dmat_to_bdmat	(	Banded_Matrix *	out,
		Dense_Matrix *	in,
		REAL	cutoff,
		int	min_gap
	)

Convert a matrix in Dense_Matrix form to Banded_Matrix form.

INPUT:

out -- Where to put the result in Banded_Matrix format. Should be initialized as follows
- .max_rows is the amount of row memory available
- .band_p points to .max_rows ints +.max_bands is the amount of band memory available
- .band_list points to 3*.max_bands ints
- .max_size is the amount of main memory available
- .matrix points to .max_size REAL
in -- the Dense_Matrix
cutoff -- the truncation threshold to use in the conversion. Entries less than cutoff are discarded. To disable truncation, set cutoff <0 .
min_gap -- the minimal allowed gap between bands. Coefficients less than cutoff are retained if removing them makes a gap of less than min_gap. To disable, set to 0.

OUTPUT: out

NOTES:

not checking for sufficient memory

Here is the caller graph for this function:

REAL dualabell	(	REAL(*)(int, int)	origbell,
		int	laplength,
		int	t
	)

Return the value of the dual to the given bell.

INPUTS:

origbell - pointer to the original bell, e.g. bellmat5.
laplength-- the number of POINTS the bell spills out of the interval. It also spills in an equal number of points. Thus the bell takes 2*laplength points to rise from 0 to 1.
t -- Which point you want to evaluate the bell at. This should range from 0 to 2*laplength-1. bell(laplength,0)=~0 then it rises until bell(laplength,2*laplength-1)=~1. The edge of the interval is between points, at t=(laplength-1) +0.5

OUTPUT: return the REAL value at that point.

Here is the caller graph for this function:

void dump_bin_bdmat	(	Banded_Matrix *	x,
		FILE *	out
	)

This routine dumps a Banded_Matrix to a file in binary format.

INPUTS:

x -- the Banded_Matrix
out -- A pointer to the file to write into. Setting this to stdout makes it write to the screen. If out==NULL we use stdout.

OUTPUT: writes into the file given by out. The writing mode ("wb" or "ab") is determined outside of this routine.

Here is the caller graph for this function:

void dump_bin_dmat	(	Dense_Matrix *	x,
		FILE *	out
	)

This routine dumps a Dense_Matrix to a file in binary format.

This is efficient, but not human readable.

INPUTS:

x -- the Dense_Matrix
out -- A pointer to the file to write into. Setting this to stdout makes it write to the screen. If out==NULL we use stdout.

OUTPUT: writes into the file given by out. The writing mode ("wb" or "ab") is determined outside of this routine.

Here is the caller graph for this function:

void dump_direct1	(	int	numpts,
		int	band_limit,
		REAL *	node,
		REAL *	weight,
		Pm_Direct_d *	pmn_matrix,
		FILE *	fp
	)

This routine dumps the initialization constructed by init_direct1 and general parameters for the direct transform for spherical harmonics, dense version.

INPUTS:

numpts -- the number of points on the full interval.
band_limit -- the bound on the degree of the expansion. In $P^m_n$ notation this bounds n.
node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
weights -- an array of length numpts with the quadrature weights
pmn_matrix -- an array of band_limit (==order_limit) Pm_Direct_d structures, each representing the matrix for one m.

OUTPUTS: fp -- the objects above are written into the file pointed to by fp, in binary format. The file must be opened before calling this routine.

Here is the call graph for this function:

Here is the caller graph for this function:

void dump_fast1	(	int	numpts,
		int	band_limit,
		int	num_levels,
		REAL *	node,
		REAL *	weight,
		Fast1_Save *	saves_fordump,
		Pm_1D *	pmn_matrix,
		FILE *	fp
	)

This routine dumps the initialization constructed by init_fast1 and general parameters for the fast transform for spherical harmonics, version 1.

INPUTS:

numpts -- the number of points on the full interval.
band_limit -- the bound on the degree of the expansion. In $P^m_n$ notation this bounds n.
num_levels -- the number of dyadic levels used in the adaption
node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
weights -- an array of length numpts with the quadrature weights
saves_fordump -- a structure containing the information needed for the bells. Constructed by init_fast1
pmn_matrix -- an array of band_limit (==order_limit) Pm_1D structures, each representing the compressed matrix for one m.

OUTPUTS: fp -- the objects above are written into the file pointed to by fp, in binary format. The file must be opened before calling this routine.

Here is the call graph for this function:

Here is the caller graph for this function:

void dyadic_g_ilist_init	(	Local_Trig_Interval *	ilist[2],
		int *	ints_used,
		int *	coefs_used,
		REAL **	bell_handle,
		int *	bell_length,
		int **	bell_offsets,
		int	halfpts,
		int	num_levels,
		REAL(*)(int, int)	bell_choice,
		int	flags
	)

This routine constructs the interval list for a dyadic partition suitable for the growing interval size search (gsearch).

INPUTS:

ilist -- an array of 2 pointers to Local_Trig_Interval, to be filled in.
halfpts -- the number of points on which to construct these intervals. For the growing search, this usually represents half of the number of points in the full interval.
num_levels -- the number of dyadic levels to use. We need halfpts >= (1<<num_levels) and halfpts % (1<<num_levels) == 0
bell_choice-- the bell function to use
flags -- integer flags passed to the fftw initializer

OUTPUTs:

ilist[0/1] -- has memory allocated and is filled in
- ilist[0] -- gets intervals with trig functions for even parity at the right edge
- ilist[1] -- gets intervals with trig functions for odd parity at the right edge
ints_used -- is filled in with the length of the ilist's
coefs_used -- is filled in with the total number of possible coefficients this ilist can produced. This is used to allocate memory and set offsets within whatever array will eventually hold these coefficients
bell_handle -- is pointed to an array which contains all the bell values. This is used when we want to save the bells to disc efficiently.
bell_length -- is set to the length of bell_handle.
bell_offsets -- is pointed to an array containing integer offsets withing bell_handle, indicating the location of particular bells.

NOTES:

coefs_used includes offsets for the left edge asymmetric bells, which are not used.
We now use the bell memory efficiently, with no repeated computations. For the trig transforms, we do not repeat the trig value or fftw computations. There is an inefficiency in that we have multiple versions of the workspace. This adds trivial memory, but may have cache implications
There was a mysterious segmentation fault possibly related to free(plan_holder) so that is now disabled. This may cause a memory leak.

set which_transform

Here is the call graph for this function:

Here is the caller graph for this function:

int dyadic_gsearch ( Dyadic_Gsearch_Save * dy_gs_save )

This routine implements a `best non-decreasing dyadic partition' search. It finds the partition with lowest cost under the restriction that an interval's right neighbor is either the same size or one size (2x) larger. Generally this is to be used with local cosine. When an interval's left neighbor is the same size, we use a regular full sized bell and put the cost of this expansion in .cost_full[][]. When the left neighbor is smaller we shrink the left edge of the bell, making it asymmetric and yielding .cost_asym[][].

The search tree is grown from the left, with an interval having children its allowed right neighbors. The cost of a partition is the cost of a path from the left edge to the right edge. Since each interval has two costs, the path into an interval is also important.

   | _--------------O                   |        
   |/                                   |                     
  /|  _-----O-------|--------->O        |                       
 O-|-/           _--|--------/          |
  \|       |    /   |         |         |                     
   |~--O---|-->O----|--->O----|---->O   |

The generic decision step is: assume the current node's children contain the minimal cost of a path from them to the right. Compare these costs and ADD the smaller to the full and asym costs of the current node. We process intervals in order of the x-coordinate of their center, starting at the right.

INPUTS: dy_gs_save -- a structure holding memory for the costs, branching and ordering of interval. See the declaration of the structure Dyadic_Gsearch_Save for the full description of its fields.

OUTPUTS: returns the minimal cost, and also stores it in dy_gs_save.costfull[dy_gs_save.best_level][0]. To determine the partition chosen, start at [best_level][0] and follow .branching[][], which has values one of {RIGHTEDGE=0, SAMESIZE=1, UPSIZE=2}

Here is the caller graph for this function:

void dyadic_gsearch_free ( Dyadic_Gsearch_Save * dy_gs_save )

This function frees the structure Dyadic_Gsearch_Save used by dyadic_gsearch and initialized by dyadic_gsearch_init.

INPUTS: dy_gs_save -- a pointer to the structure to free

OUTPUT: none

Here is the caller graph for this function:

void dyadic_gsearch_init	(	Dyadic_Gsearch_Save *	dy_gs_save,
		int	num_levels
	)

This function initializes the structure Dyadic_Gsearch_Save for use by dyadic_gsearch. Memory can be freed by dyadic_gsearch_free.

INPUTS:

dy_gs_save -- a pointer to the structure to fill in
num_levels -- the number of dyadic levels to use

OUTPUT: dy_gs_save -- memory is allocated and some things filled in.

Here is the caller graph for this function:

void evaluate_intlist	(	REAL *	outfcn,
		REAL *	incoef,
		Local_Trig_Interval *	interval_list,
		int	num_intervals
	)

This routine does a LCT/DCT evaluation based on a list of intervals.

INPUTS:

outfcn -- where to put the function constructed. We ADD onto outfcn
incoef - the coefficients to evaluate. destroyed
interval_list-- An array of Local_Trig_Interval's, encoding what intervals to evaluate, how to evaluate, and where to put the result
num_intervals - the number of intervals

OUTPUTS: outfcn is filled in. incoef is destroyed

NOTES:

evaluates using the dual bell

Here is the call graph for this function:

Here is the caller graph for this function:

void expand_d_intlist	(	REAL *	outcoef,
		REAL *	infcn,
		Local_Trig_Interval *	interval_list,
		int	num_intervals
	)

This routine does a LCT/DCT expansion on a list of intervals.

This version expands using the DUAL bell. Otherwise it is just like expand_intlist.

See expand_intlist for a full description.

Here is the call graph for this function:

Here is the caller graph for this function:

void expand_intlist	(	REAL *	outcoef,
		REAL *	infcn,
		Local_Trig_Interval *	interval_list,
		int	num_intervals
	)

This routine does a LCT/DCT expansion on a list of intervals.

INPUTS:

outcoef -- where to put the coefficients constructed.
infcn - the fcn to expand
interval_list-- An array of Local_Trig_Interval's, encoding what intervals to expand, how to expand, and where to put the result -num_intervals - the number of intervals

OUTPUTS: outcoef is filled in. infcn is untouched

Here is the call graph for this function:

Here is the caller graph for this function:

void fast1_fromfile	(	int *	numpts_ptr,
		int *	band_limit_ptr,
		REAL **	node_ptr,
		REAL **	weight_ptr,
		Pm_1D **	pmn_matrix_ptr,
		Pm_1D_Workspace *	pmnwork,
		FILE *	fp
	)

This routine loads the initialization dumped by dump_fast1, interprets it and does all the set-up necessary to do the synthesis or analysis transform

INPUTS: fp -- the objects are read from the file pointed to by fp, in binary format. The file must be opened before calling this routine.

OUTPUTS:

numpts -- the number of points on the full interval.
band_limit -- the bound on the degree of the expansion. In $P^m_n$ notation this bounds n.
node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
weight -- an array of length numpts with the quadrature weights
pmn_matrix -- an array of band_limit (==order_limit) Pm_1D structures, each representing the compressed matrix for one m.
pmn_work -- a structure holding workspace and other precomputed values used by the transform, independent of m

NOTES:

node and weight are not needed for the synthesis/analysis routines but are provided here for other uses.

Here is the call graph for this function:

REAL fast1_tofile	(	int	numpts,
		int	band_limit,
		REAL *	node,
		REAL *	weight,
		REAL	epsilon,
		FILE *	fp
	)

This routine initializes the fast transform for spherical harmonics (version 1) and dumps it to a file. This version is designed for the novice user, and so doesn't allow tuning.

INPUTS:

numpts -- the number of points on the full interval. Must be divisible by 4. More factors of 2 will improve performance.
band_limit -- the bound on the degree of the expansion. In $P^m_n$ notation this bounds n.
node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
weight -- an array of length numpts with the quadrature weights
epsilon -- the level at which to truncate. The error in the transform will be slightly larger than this

OUTPUTS:

fp -- the objects created are written onto the file pointed to by fp, in binary format. The file must be opened before calling this routine.
returns the compression ratio, which is an upper bound on the speed up factor of using this algorithm

NOTES:

node and weight are input so the user can select.
we hard-wire the largest number of levels legal. This will slow initialization but is the best thing for novice users
The safety factor on the bell may not be quite right
we leak a little bit of memory from the trig plans

Here is the call graph for this function:

void fold_e_e_1s	(	REAL *	coef,
		REAL *	fcn,
		int	numpts,
		REAL *	left,
		int	n_left,
		REAL *	right,
		int	n_right,
		REAL *	bell,
		int	dir
	)

Computes the folded version of a function, which is then usually DCTed also can unfold

INPUTS:

coef -- a pointer to the left edge of the array for the folded function "coefficients"
fcn -- a pointer to the left end of the main interval
numpts -- the length of the main interval
left -- a pointer to the right edge of the interval that sits to the left of the main interval left will be called with negative indices
n_left-- the length of left to use. The bell spills this much into left, and the same amount into fcn.
right -- a pointer to the left edge of the interval that sits to the right of the main interval
n_right-- the length of right to use. The bell spills this much into right, and the same amount into fcn.
bell --- an array containing the bell values
dir -- which operation to do:
- 1 : fold
- -1 : unfold

OUTPUT:

if dir==1: coef -- the folded version of the function. length numpts is written on
if dir==-1: fcn,left,right -- the unfolded version of the function is ADDED to these arrays
else : no action

NOTES:

If all the data is contiguous, we will have right=fcn+numpts and left=fcn-1;
We could incorporate a skip if the data is transposed.

Here is the caller graph for this function:

void fold_e_o_1s	(	REAL *	coef,
		REAL *	fcn,
		int	numpts,
		REAL *	left,
		int	n_left,
		REAL *	right,
		int	n_right,
		REAL *	bell,
		int	dir
	)

Computes the folded version of a function, which is then usually LCTed also can unfold.

INPUTS:

coef -- a pointer to the left edge of the array for the folded function "coefficients"
fcn -- a pointer to the left end of the main interval
numpts -- the length of the main interval
left -- a pointer to the right edge of the interval that sits to the left of the main interval. left will be called with negative indices.
n_left-- the length of left to use. The bell spills this much into left, and the same amount into fcn.
right -- a pointer to the left edge of the interval that sits to the right of the main interval
n_right-- the length of right to use. The bell spills this much into right, and the same amount into fcn.
bell --- an array containing the bell values
dir -- which operation to do:
- 1 : fold
- -1 : unfold

OUTPUT:

if dir==1: coef -- the folded version of the function. length numpts is written on
if dir==-1: fcn,left,right -- the unfolded version of the function is ADDED to these arrays
else : no action

NOTES:

If all the data is contiguous, we will have right=fcn+numpts and left=fcn-1;
We could incorporate a skip if the data is transposed

Here is the caller graph for this function:

void free_bdmat_memory ( Banded_Matrix * in )

This routine frees the memory that was given to a Banded_Matrix by init_bdmat_memory.

INPUT: in -- the Banded_Matrix to free. it must have been initialized by init_bdmat_memory, or we will attempt to free things not gotten by calloc, probaby causing catastrophic failure.

NOTES:

if used incorrectly, this routine may yield nasty memory bugs

Here is the caller graph for this function:

void free_dmat_memory ( Dense_Matrix * in )

This routine frees the memory that was given to a Dense_Matrix by init_dmat_memory.

INPUT: in -- the Dense_Matrix to free. it must have been initialized by init_dmat_memory, or we will attempt to free things not gotten by calloc, probaby causing catastrophic failure.

NOTES:

If used incorrectly, this routine may yield nasty memory bugs.

Here is the caller graph for this function:

void ftgaqd	(	int *	nlat,
		REAL *	theta,
		REAL *	wts,
		REAL *	work,
		int *	lwork,
		int *	ierror
	)

This routine emulates gadq in the SPHEREPACK library

INPUTS:

nlat -- the number of gaussian colatitudes in the interval (0,pi) (between the two poles). nlat must be greater than zero.
work -- a temporary work space NOT USED
lwork -- the length of the work space NOT USED

OUTPUTS:

theta -- a REAL vector of length nlat containing the nlat gaussian colatitudes on the sphere in increasing radians in the interval (0,pi). Memory must already be allocated
wts -- a REAL vector of length nlat containing the nlat gaussian weights.Memory must already be allocated
ierror -- given value
- 0 no errors
- 2 if nlat<=0

NOTES:

the precision to which to compute the gaussian nodes is hard-wired at 1e-15.
If I make it ftgaqd_ can I call it from fortran??
Where is this routine used??

Here is the call graph for this function:

char* getoption	(	const char *	flag,
		int	argc,
		char **	argv
	)

This routine allows command line options, usually of the form: program -d 256. Normal usage in the program is:

if(getoption("-d",argc,argv)!=NULL) 
   degree=atoi(getoption("-d",argc,argv));

The first call makes sure the option flag was indeed used. If it was, we call again to get the value as a string, then convert it using ato_ .

void give_bdmat_memory	(	Banded_Matrix *	current,
		Banded_Matrix *	previous
	)

This routine sets up memory for the next Banded_Matrix we wish to use.

It looks at the previous Banded_Matrix we used, and increments fields

INPUT: previous -- the last Banded_Matrix we used

OUTPUT: current -- the Banded_Matrix we wish to use next

We set the .band_p, .band_list, and .matrix fields to the next available ints and REAL. .max_size, .max_rows, and .max_bands are set to account for the amount used in previous. also set: .non_zero=0; .n_row=-1 .n_col=0

Here is the caller graph for this function:

void give_dmat_memory	(	Dense_Matrix *	current,
		Dense_Matrix *	previous
	)

This routine sets up memory for the next Dense_Matrix we wish to use.

It looks at the previous Dense_Matrix we used, and increments fields

INPUT: previous -- the last Dense_Matrix we used.

OUTPUT: current -- the Dense_Matrix we wish to use next. We align current->matrix to the first space not used by previous->matrix, and set current->max_size to account for the amount used in previous.

Here is the caller graph for this function:

void give_dvect_memory	(	Dense_Vector *	current,
		Dense_Vector *	previous
	)

This routine sets up memory for the next Dense_Vector we wish to use. It looks at the previous Dense_Vector we used, and increments fields

INPUT: previous -- the last Dense_Vector we used

OUTPUT: current -- the Dense_Vector we wish to use next We align current->vector to the first space not used by previous->vector, and set current->max_size to account for the amount used in previous.

Here is the caller graph for this function:

void gnode_to_Pmninit	(	REAL ***	sofar,
		REAL **	ex,
		REAL **	why,
		int **	size,
		REAL *	gnode,
		int	hemipts
	)

Converts gnodes to the initialization needed for next_rtsinPmncos.

Takes the place of the initialization step in next_rtsinPmncos(....,-1)

INPUTS:

sofar-- a 2xhalfpts matrix used to store intermediate results
ex -- an array of length halfpts containing
why-- an array of length halfpts containing
size--an array of integers of length halfpoints used to store the exponent of the power of 2 removed to stabilize our function values
hemipts-- the number of point in [0,PI/2]. We use parity for [PI/2,PI]

OUTPUTS : This function creates the arrays above:

ex - will be filled in with cos(gnode[i]).
why -- will be filled in with sin(gnode[i]).

void gsearch_to_ilist	(	Local_Trig_Interval *	ilist_out,
		int *	intervals_kept,
		int	max_intervals,
		Local_Trig_Interval *	ilist_in,
		Dyadic_Gsearch_Save *	dy_gs_save,
		int	halfpts
	)

This function converts the choices made by the dyadic_gsearch to a list of intervals.

INPUTS:

max_intervals -- the largest number of interval ilist_out can hold
ilist_in -- all possible intervals, in Local_Trig_Interval format
dy_gs_save -- a pointer to gsearch structure From its best_level and branching fields we extract the partition.
halfpts -- the numpts of data points this search covered In the usual case where the grow search is for half the true function interval, this is half the number of points.

OUTPUT:

ilist_out -- written onto with the list of interval. Note that we copy the structure Local_Trig_Interval, which includes pointers to other things. We do not copy the arrays they point to
intervals_kept -- the number of intervals kept is written here

NOTES:

It is possible to run over ilist_in
In 2007 fixed bug i=i++ --> i++

Here is the caller graph for this function:

Banded_Matrix init_bdmat_memory	(	int	max_size,
		int	max_rows,
		int	max_bands
	)

This routine gets a large amount of memory for Banded_Matrix's, and gives it to a single Banded_Matrix to hold. This special Banded_Matrix acts as a pointer to this block of memory, and also keeps track of how large this memory is.

INPUT:

max_size -- the amount of memory to get for matrix
max_rows -- the amount of memory to get for band_p
max_bands -- (1/3) the amount of memory to get for band_list

OUPUT: returns a Banded_Matrix with

.max_size=max_size
.max_rows=max_rows+1
.max_bands=max_bands;
.non_zero=0;
.n_row=-1 This ruse allows give_bdmat_memory to pass along all the memory instead of losing one row
.n_col=0
.band_p pointing to max_rows+1 ints
.band_list pointing to 3*max_bands ints
.matrix pointing to max_size REALs

Here is the caller graph for this function:

void init_direct1	(	Pm_Direct_d **	out,
		int	halfpts,
		int	band_limit,
		REAL *	node
	)

This routine initializes for the transform for spherical harmonics, direct version 1. This version simple precomputes the Pm matrices and stores them in dense matrix format.

INPUTS:

halfpts -- half the number of points on the full interval. This implies the full number of points is even, and we also assume the nodes are symmetric. Currrently halfpts must also be even for our trig transforms to work.
band_limit -- the bound on the degree of the expansion. In $P^m_n$ notation this bounds n. For gaussian nodes this must be < 2*halfpts and for other nodes it should be even smaller. We confirn band_limit < 2*halfpts but leave the rest to the user.
node -- an array with the nodes, usually Arcsin of the usual gaussian nodes. We use halfpts of these, representing half of the interval.

OUTPUTS: out -- memory is allocated for band_limit (==order_limit) Pm_Direct_d structures, each representing the matrix for one m. Various bits of memory are allocated for these, and values filled in.

Here is the call graph for this function:

Here is the caller graph for this function:

Dense_Matrix init_dmat_memory ( unsigned int n )

This routine gets a large amount of memory for Dense_Matrix's, and gives it to a single Dense_Matrix to hold. This special Dense_Matrix acts as a pointer to this block of memory, and also keeps track of how large this memory is.

INPUT: n -- the amount of memory to get

OUPUT: returns a Dense_Matrix with

.max_size=n
.n_row=0
.n_col=0
.matrix pointing to memory for n REAL's

Here is the caller graph for this function:

Dense_Vector init_dvect_memory ( unsigned int n )

This routine gets a large amount of memory for Dense_Vector's, and gives it to a single Dense_Vector to hold. This special Dense_Vector acts as a pointer to this block of memory, and also keeps track of how large this memory is.

INPUT: n -- the amount of memory to get

OUPUT: returns a Dense_Vector with

.max_size=n
.n_row=0
.vector pointing to memory for n REAL's

Here is the caller graph for this function:

void init_fast1	(	Pm_1D **	out,
		Fast1_Save *	saves_fordump,
		int	halfpts,
		int	band_limit,
		REAL *	node,
		int *	numlevels,
		REAL	epsilon,
		REAL(*)(int, int)	bell_choice
	)

This routine initializes for the fast transform for spherical harmonic, version 1.

Version 1 compresses in 1D (each Pmn separately) and chooses a single partition for each m and parity. By choosing a single partition we minimize the trig transform cost, but sacrifice precision. This is appropriate for small problems, near the break-even point, since it reduces overhead.

INPUTS:

halfpts -- half the number of points on the full interval. This implies the full number of points is even, and we also assume the nodes are symmetric. Currrently halfpts must also be even for our trig transforms to work.
band_limit -- the bound on the degree of the expansion. In $P^m_n$ notation this bounds n. For gaussian nodes this must be < 2*halfpts and for other nodes it should be even smaller. We confirn band_limit < 2*halfpts but leave the rest to the user.
node -- an array with the nodes, usually Arcsin of the usual gaussian nodes. We use halfpts of these, representing half of the interval.
bell_choice -- a pointer to the bell function to use. This should be tuned to the desired precision. The choice of bell is subtle, and handled elsewhere.
numlevels -- the number of dyadic levels to use when trying to adapt. halfpts must be divisible by 2^numlevels. If numlevels is input too large or negative, this routine will choose the maximum allowed.
epsilon -- the target precision. The fast algorithm is an approximate method, tunable to the desired precision. The algorithm is faster the less precision (larger epsilon but < 1) you request. On the other hand, doubling the number of digits slows the algorithm by less than a factor of 2, so asking for a little extra is safe. If epsilon is chosen too close to the round-off error, the algorithm will degenerate to direct matrix applications.

OUTPUTS:

out -- memory is allocated for band_limit (==order_limit) Pm_1D structures, each representing the compressed matrix for one m. Various bits of memory are allocated for these, and values filled in.
saves_fordump -- bell values and total length are filled in here so we can dump them to file efficiently.
numlevels -- is modified to the maximum allowed number if the input number was illegal.

NOTES:

halfpts must be even for this routine to function. Performance (speed) of the algorithm will improve if halfpts has more factors of 2. For best performance, the non-power-of-two factor in halfpts should be small (10 or so).
output ? total coefficients,bands,rows,intervals ? modified num_levels ?some holder for the trig plans
leaks some memory
right FFTW flags? import and export wisdom?
min_gap hard wired
bdmat_raw may not have enough bands

Here is the call graph for this function:

Here is the caller graph for this function:

void init_Pm_1d_workspace	(	Pm_1D_Workspace *	out,
		int	numpts,
		int	band_limit,
		REAL *	node,
		REAL *	weight
	)

This routine initializes a structure Pmncos_1D_Workspace, which is used to hold workspace and a few precomputed values for use in the synthesis/analysis routines.

INPUTS:

numpts -- the number of points in the interval (number of latitudes) used here to determine memory needed.
band_limit -- the limit on the degree of the expansion. Used here to determine the maximal number of coefficients possible for one order.
node -- an array of numpts REALs specifying the discretization. Usually this will be gaussian nodes.
weight -- an array of numpts REALs specifying the quadrature weight. Usually this will be gaussian weight.

OUTPUT: out -- memory is allocated for the fields of out, and some values are precomputed.

NOTE:

probably should pack the memory together

Here is the call graph for this function:

Here is the caller graph for this function:

void lctw	(	REAL *	fcn,
		Ftrigtw_Plan *	main_plan
	)

Computes the local cosine transform of fcn and puts the coeficients back into fcn.

The local cosine transform is also known as the type 4 discrete cosine transform.

This is the LCT at half integer points. Is its own inverse.

INPUTS:

fcn array containing the data
main_plan -- a structure holding precomputed trig functions, the plan used by fftw, and the number of points; initialized by lctw_init.

OUTPUT: fcn. Puts the coefficients in fcn. specifically: \[ f(k) =norm \sum_{j=0}^{numpts-1} f(j) \cos\left(\pi\frac{(k+1/2)(j+1/2)}{numpts}\right) \]

         numpts-1              (k+0.5)  (j+0.5)
  fcn[k]= SUM   fcn[j] cos( PI ----------------  ) *norm
         j=0                         numpts

set for $L^2$ normalization.

NOTES:

comment better
requires even numpts. checked by lctw_init

Here is the caller graph for this function:

void lctw_init	(	Ftrigtw_Plan *	main_plan,
		int	flags,
		int	numpts
	)

This routine initializes for lctw

We initialize trigonometic things used by the lct and dct portions, and call the initailizer for the fftw real transform (forward).

INPUTS:

main_plan -- a structure that will hold the things we generate Memory for its fields is allocated in this routine.
flags -- flags passed directly to the rfftw initializer
numpts -- the number of points on which we will transform MUST be even

OUTPUTS: main_plan is filled in:

NOTES:

comment better

Here is the caller graph for this function:

void load_bin_bdmat	(	Banded_Matrix *	x,
		FILE *	in
	)

This routine boads a Banded_Matrix to a file in binary format, as written by dump_bin_bdmat

INPUTS:

x -- the Banded_Matrix. Should be initialized as follows
- .max_rows is the amount of row memory available
- .band_p points to .max_rows ints
- .max_bands is the amount of band memory available
- .band_list points to 3*.max_bands ints
- .max_size is the amount of main memory available
- .matrix points to .max_size REAL
in -- A pointer to the file to read from. Setting this to stdin makes it read from the screen. If in==NULL we use stdin. The reading mode ("rb") is determined outside of this routine.

OUTPUT: x -- is filled in.

Here is the caller graph for this function:

void load_bin_dmat	(	Dense_Matrix *	x,
		FILE *	in
	)

This routine loads a Dense_Matrix from a file in binary format, as written by dump_bin_dmat.

INPUTS:

x -- the Dense_Matrix to be filled in. Should be initialized with
- .max_size indicating the amount of memory available
- .matrix pointing to max_size REALs
in -- A pointer to the file to read from. Setting this to stdin makes it read from the screen. If in==NULL we use stdin. The reading mode ("rb") is determined outside of this routine.

OUTPUT: x -- is filled in.

Here is the caller graph for this function:

void load_direct1	(	int *	numpts_ptr,
		int *	band_limit_ptr,
		REAL **	node_ptr,
		REAL **	weight_ptr,
		Pm_Direct_d **	pmn_matrix_ptr,
		FILE *	fp
	)

This routine loads the initialization dumped by dump_direct1 We interpret what we read and re-align pointers and such to get things ready to use

INPUTS: fp -- the objects are read from the file pointed to by fp, in binary format. The file must be opened before calling this routine.

OUTPUTS:

numpts -- the number of points on the full interval.
band_limit -- the bound on the degree of the expansion. In P^m_n notation this bounds n.
node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
weights -- an array of length numpts with the quadrature weights
pmn_matrix -- an array of band_limit (==order_limit) Pm_Direct_d structures, each representing the matrix for one m.

Here is the call graph for this function:

Here is the caller graph for this function:

void load_fast1	(	int *	numpts_ptr,
		int *	band_limit_ptr,
		int *	num_levels_ptr,
		REAL **	node_ptr,
		REAL **	weight_ptr,
		Fast1_Save *	saves_fordump,
		Pm_1D **	pmn_matrix_ptr,
		FILE *	fp
	)

This routine loads the initialization dumped by dump_fast1.

We interpret what we read and re-align pointers and such to get things ready to use.

INPUTS: fp -- the objects are read from the file pointed to by fp, in binary format. The file must be opened before calling this routine

OUTPUTS:

numpts -- the number of points on the full interval.
band_limit -- the bound on the degree of the expansion. In P^m_n notation this bounds n.
num_levels -- the number of dyadic levels used in the adaption
node -- an array of length numpts with the nodes, usually Arcsin of the usual gaussian nodes.
weights -- an array of length numpts with the quadrature weights
saves_fordump -- a structure containing the information needed for the bells. Constructed by init_fast1
pmn_matrix -- an array of band_limit (==order_limit) Pm_1D structures, each representing the compressed matrix for one m.

NOTES:

num_levels is not needed by the synthesis/analysis routines. It may be useful to keep it so we could re-dump.
saves_for dump is also not needed. we could free it here.
we free plan_holder, thus losing our handle on the trig plans This may cause memory leaks since we can't free them easily
We now have separate workspace for each trig plan, but these could be combined.

Here is the call graph for this function:

Here is the caller graph for this function:

void ltrigint_init	(	Local_Trig_Interval *	out,
		int	numpts,
		int	left_extra,
		int	right_extra,
		int	fcn_offset,
		int	coef_offset,
		REAL(*)(int, int)	bell_choice,
		int	which_transform,
		int	fftw_flags
	)

This routine initializes a Local_Trig_Interval.

INPUTS:

numpts
left_extra
right_extra
fcn_offset
coef_offset
which_transform -- are copied into the structure
bell_choice -- a pointer to the bell function to use
fftw_flags -- flags passed to the fftw initializer

OUTPUTS: out -- is written onto with the above info.

Memory is obtained for .bell and .dualbell and they are initialized.
.transform_plan is initialized for the transform indicated by which_transform.

NOTES:

document better

Here is the call graph for this function:

Here is the caller graph for this function:

void mk_gnode_gweight	(	REAL *	gnode,
		REAL *	gweight,
		REAL	epsilon,
		int	numpts
	)

This routine constructs the gaussian nodes and weights.

We make cosine inverse of the ordinary gaussian nodes.

INPUTS:

gnode -- the array of length numpts in which to put the gaussian nodes. Memory must already be allocated.
gweight -- the array in which to put the gaussian weights. Memory must already be allocated.
epsilon -- the precision to which to compute the gaussian nodes.
numpts -- the order of the nodes and weights we compute this is the length of gnode and gweight.

OUTPUTS:

gnode -- the numpts gaussian node values are written on gnode We make cosine inverse of the ordinary gaussian nodes.
gweight -- numpts values of gweight are filled in

Here is the call graph for this function:

Here is the caller graph for this function:

void next_rtsinPmncos	(	REAL *	outfcn,
		Pmncos_Recurrence_Save *	pmn_save,
		int	halfpts,
		int	order,
		int	index
	)

Do one step in the recurrence process then output the results.

This saves on memory if we are trying to compress all associated Legendre fcns of some order. This version is super stabilized for all conditions.

INPUTS:

out-- the results will be put in this array of length halfpts, in format out[i]=C^m_n P^m_n(pmn_save->ex[i])*sqrt(pmn_save->why[i]). Generally pmn_save->ex[i] is cos(quadrature_point[i]) pmn_save->ex[i] is sin(quadrature_point[i])
pmn_save -- a structure that holds some precomputed trig values and the results of previous recurrences. Must be initialized with Pmncos_resave_init.
halfpts-- the number of points to use. Generally this is the number of points in [0,PI/2] and we use parity for [PI/2,PI]. This routine does not care, however, because the location of the points is encoded in pmn_save.
order -- the order of the associated legendre function desired. In P^m_n notation this is m
index--- the index desired minus m. In P^m_{m+k} notation this is k.

OUTPUTS : results are output in outfcn, above

USAGE: You must call this function for successive values of index, starting at 0. Order must remain fixed.

NOTES:

We pass type REAL to several functions which expect double. This should not cause trouble but still might.
Is this the correct norm fix ??

Here is the caller graph for this function:

void Pm_1d_analyse	(	REAL *	outcoef,
		Pm_1D *	inmat,
		REAL *	infcn,
		Pm_1D_Workspace *	pmnwork
	)

Expands (analyses) a function into spherical harmonic coefficients.

This routine only does one order (m) so it is really only an Associaded Legendre Function analyser.

INPUTS:

inmat -- The precomuted matrix of Associated Legendre functions of the desired order in compressed Pm_1D format
infcn -- an array of REALs to hold the function to expand. Its length is encoded in the number of columns in inmat.
pmnwork -- a structure that holds workspace and a few global precomputed values.

OUTPUTS: outcoef -- an array of the coefficients computed. The information on how many of these to compute is encoded by the number of rows in inmat.

NOTES:

We could check that (numbas[0]==numbas[1])||(numbas[0]==numbas[1]+1)
?UNROLL the parity LOOP ONCE DEBUGGED?

Here is the call graph for this function:

void Pm_1d_synthesize	(	REAL *	outfcn,
		Pm_1D *	inmat,
		REAL *	incoef,
		Pm_1D_Workspace *	pmnwork
	)

Evaluates (synthesizes) a function from spherical harmonic coefficients.

This routine only does one order (m) so it is really only an Associaded Legendre Function synthesizer.

INPUTS:

inmat -- The precomuted matrix of Associated Legendre functions of the desired order in compressed Pm_1D format
incoef -- an array of the coefficients to use. The information on how many of these to use is encoded by the number of rows in inmat.
pmnwork -- a structure that holds workspace and a few global precomputed values

OUTPUTS: outfcn -- an array of REALs to hold the computed function. Its length is encoded in the number of columns in inmat.

NOTES:

we could check that (numbas[0]==numbas[1])||(numbas[0]==numbas[1]+1)

Here is the call graph for this function:

void Pm_direct1_analyse	(	REAL *	outcoef,
		Pm_Direct_d *	inmat,
		REAL *	infcn,
		Pm_1D_Workspace *	pmnwork
	)

Expand (analyses) a function into spherical harmonic coefficients.

This routine only does one order (m) so it is really only an Associated Legendre Function analyser.

INPUTS:

inmat -- The precomuted matrix of Associated Legendre functions of the desired order in Pm_Direct_d format
infcn -- an array of REALs to hold the function to expand. Its length is encoded in the number of columns in inmat
pmnwork -- a structure that holds workspace and a few global precomputed values

OUTPUTS: outcoef -- an array of the coefficients computed. The information on how many of these to compute is encoded by the number of rows in inmat.

NOTES:

We could check that (numbas[0]==numbas[1])||(numbas[0]==numbas[1]+1)
?UNROLL the parity LOOP ONCE DEBUGGED?

Here is the call graph for this function:

void Pm_direct1_synthesize	(	REAL *	outfcn,
		Pm_Direct_d *	inmat,
		REAL *	incoef,
		Pm_1D_Workspace *	pmnwork
	)

Evaluates (synthesizes) a function from spherical harmonic coefficients.

This routine only does one order (m) so it is really only an Associaded Legendre Function synthesizer.

INPUTS:

inmat -- The precomuted matrix of Associated Legendre functions of the desired order in Pm_Direct_d format
incoef -- an array of the coefficients to use. The information on how many of these to use is encoded by the number of rows in inmat.
pmnwork -- a structure that holds workspace and a few global precomputed values

OUTPUTS: outfcn -- an array of REALs to hold the computed function. Its length is encoded in the number of columns in inmat

NOTES:

We could check that (numbas[0]==numbas[1])||(numbas[0]==numbas[1]+1)

Here is the call graph for this function:

REAL Pmncos	(	int	order,
		int	index,
		REAL	phi
	)

Evaluate an associated Legendre function P^m_n at any point.

This routine is super stabilized for all conditions.

INPUTS:

order -- the order of the associated legendre function you want. In P^m_n notation this is m. order>=0
index -- Which frequency to use, starting from the lowest legal. In P^m_n notation, n is m+index. index >=0
phi -- The point at which to evaluate. 0< phi < PI

OUTPUTS: returns $cP^m_n(\cos(\phi))$ with c to make L^2 normalized.

                                m
                               P (cos(phi)) * const
                                n

The constant is set so we are L^2 normalized. Since the number of points to be used is not known, we cannot adjust for the discretization.

NOTES:

We pass type REAL to several functions which expect double. This should not cause trouble but still might.
Just like dbrtsinPmncos except for sqrt(sin) factor.

void Pmncos_resave_destroy ( Pmncos_Recurrence_Save * pmn_save )

destroy (free) a structure Pmncos_Recurrence_Save

INPUTS: pmn_save -- the structure to free

OUTPUTS : The memory held by pmn_save is freed

Here is the caller graph for this function:

void Pmncos_resave_init	(	Pmncos_Recurrence_Save *	pmn_save,
		REAL *	node,
		int	numpts
	)

Converts a set of nodes (often gaussian) to the initialization needed for the associated legendre function recurrence.

INPUTS:

pmn_save -- a structure to hold the outputs
node -- and array holding the sampling nodes. The array is of length (numpts+1)/2 and the values should be in [0,PI/2]. We assume the sampling on (PI/2,PI] is symmetric
numpts -- the number of sample points. We use half of these (plus 1 if odd)

OUTPUTS : This function creates memory for the arrays in pmn_save and initializes some values.

Here is the caller graph for this function:

void pointwise_multiply	(	REAL *	out,
		REAL *	in_1,
		REAL *	in_2,
		int	numpts
	)

Pointwise multiply two vectors to form the third.

INPUTS:

out -- a vector of length numpts in which to put the output. May be the same as in_1 or in_2
in_1 -- the first input vector, of length numpts
in_2 -- the second input vector, of length numpts
numpts -- the length of the vectors

OUTPUTS: out -- is written onto; out[i]=in_1[i]*in_2[i];

Here is the caller graph for this function:

void reflect_evenodd	(	REAL *	even,
		REAL *	odd,
		REAL *	whole,
		int	numpts,
		int	dir
	)

Form the even and odd parts of the input vector.

It can also reverse the process.

INPUTS:

even -- In forward mode, the even portion will be put here. In reverse mode, the even portion is read from here. Of length (numpts+1)/2.
odd -- In forward mode, the odd portion will be put here. In reverse mode, the odd portion is read from here. Of length numpts/2.
whole -- in forward mode, the fcn to read from. In reverse mode, where to put the function. Of length numpts.
numpts -- the length of whole and twice the length of even and odd. If numpts is odd, the extra point goes to even.
dir -- the direction to go in
- dir==1 fold whole into even/odd
- dir==-1 unfold even/odd into whole
- else error

OUTPUTS:

In forward mode even and out are written onto, with the even/odd reflections of whole:
- even[i]=whole[i]+whole[numpts-1-i]
- odd[i]=whole[i]-whole[numpts-1-i]
If the number of points is odd, the center point is copied into even, and omitted from odd.
in reverse mode undoes this process

NOTES:

the exception handling for odd numpts might slow this down

Here is the caller graph for this function:

void remove_rootsin	(	REAL *	out,
		REAL *	in,
		REAL *	node,
		int	numpts
	)

Divide the vector in by sqrt(sin( node values)).

INPUTS:

out -- a pointer to where to put the outputs. May be the same as in.
in -- the input zector, of length numpts
node -- and array holding the sampling nodes, corresponding to numpts physical points. Should be in (0,PI)
numpts -- the number of sample points. (plus 1 if odd)

OUTPUTS : out -- is written onto out[i]=in[i]/sqrt(sin(node[i]))

NOTES:

For safety we do sqrt(fabs(sin(node[i]))) This is so if the node value is out of range the routine doesn't crash
We could still crash by dividing be zero

Here is the caller graph for this function:

void rfftw_to_NR_format	(	REAL *	data_nr,
		REAL *	data_rfftw,
		int	numpts,
		int	dir
	)

Translates between the formats for the real discrete fourier transform used by the rfftw package and the Numerical Recipies routines. This helps allow the NR DCT routine to call the rfftw.

INPUTS:

data_nr -- The data in the nr format: real[0],real[n/2],real[1],imag[1]... ...real[n/2-1],imag[n/2-1]
data_rfftw--The data in the rfftw format: real[0],real[1],...real[n/2], imag[(n+1)/2-1],....,imag[1]
numpts -- the length n of the vectors; MUST be even
dir
- 1 for rfftw -> nr
- -1 for nr -> rfftw

OUTPUTS:

if dir==1, data_nr is written onto
if dir==-1, data_rfftw is written onto

NOTES:

we must have REAL==fftw_real
there is a sign convention difference on the imaginary parts realfft_pre(,,,,1); versus rfftw_one(rw_plan_forward,,);

Here is the caller graph for this function:

double second ( )

return the current time for performance timings

Data Structures

Defines

Functions

Detailed Description

Define Documentation

Function Documentation