/*------------------------------------------*/
/* File : jpeg.c, main for jfif decoder		*/
/* Author : Pierre Guerrier, march 1998		*/
/*                                       	*/
/* 19/01/99  Edited by Koen van Eijk 		*/
/*									  		*/
/* 05/01/06  Adapted for Topfield by DMK	*/
/*------------------------------------------*/



#include <stdio.h>
#include "TAP.h"



/*----------------------------------*/
/* JPEG format parsing markers here */
/*----------------------------------*/

//#define RST_MK(x)	( (0xFFF8&(x)) == 0xFFD0 )	// is x a restart interval ?
#define RST_MK(x)	( (0xF8&(x)) == 0xD0 )		// is x a restart interval ?

#define M_SOF0  0xC0		// Start Of Frame N
#define M_SOF1  0xC1		// N indicates which compression process
#define M_SOF2  0xC2		// Only SOF0-SOF2 are now in common use
#define M_SOF3  0xC3
#define M_DHT	0xC4
#define M_SOF5  0xC5		// NB: codes C4 and CC are NOT SOF markers
#define M_SOF6  0xC6
#define M_SOF7  0xC7
#define M_SOF9  0xC9
#define M_SOF10 0xCA
#define M_SOF11 0xCB
#define M_SOF13 0xCD
#define M_SOF14 0xCE
#define M_SOF15 0xCF
#define M_SOI   0xD8		// Start Of Image (beginning of datastream)
#define M_EOI   0xD9		// End Of Image (end of datastream)
#define M_SOS   0xDA		// Start Of Scan (begins compressed data)
#define M_DQT	0xDB
#define M_DRI	0xDD
#define M_APP0	0xE0		// Application-specific marker, type N
#define M_APP1	0xE1
#define M_APP12	0xEC		// (we don't bother to list all 16 APPn's)
#define M_MSK	0xF0
#define M_COM   0xFE		// COMment



// Defines for TAP environment
#define	printf TAP_Print



/*------------------------------------------------------*/
/* all kinds of macros here								*/
/*------------------------------------------------------*/

#define first_quad(c)   ((c) >> 4)        // first 4 bits in file order
#define second_quad(c)  ((c) & 15)

#define HUFF_ID(hclass, id)       (2 * (hclass) + (id))

#define DC_CLASS        0
#define AC_CLASS        1



/*------------------------------------------------------*/
/* JPEG data types here									*/
/*------------------------------------------------------*/

typedef struct DECODER_DATA
{
	unsigned char * data;
	unsigned char * data_start;
} DECODER_DATA;

typedef union {		// block of pixel-space values
	unsigned char	block[8][8];
	unsigned char	linear[64];
} PBlock;

typedef union {		// block of frequency-space values
	int block[8][8];
	int linear[64];
} FBlock;



// component descriptor structure
typedef struct {
	unsigned char	CID;		// component ID
	unsigned char	IDX;		// index of first block in MCU

	unsigned char	HS;			// sampling factors
	unsigned char	VS;
	unsigned char	HDIV;		// sample width ratios
	unsigned char	VDIV;

	char			QT;			// QTable index, 2bits
	char			DC_HT;		// DC table index, 1bit
	char			AC_HT;		// AC table index, 1bit
	int				PRED;		// DC predictor value
} cd_t;



/*--------------------------------------*/
/* Parse Variables						*/
/*--------------------------------------*/

static unsigned char bit_count;	// available bits in the window
static unsigned char window;



/*--------------------------------------*/
/* private huffman.c defines and macros */
/*--------------------------------------*/

#define HUFF_EOB		0x00
#define HUFF_ZRL		0xF0



/*------------------------------------------*/
/* some constants for on-the-fly IQ and IZZ */
/*------------------------------------------*/

static const int G_ZZ[] = {
   0,  1,  8, 16,  9,  2,  3, 10,
  17, 24, 32, 25, 18, 11,  4,  5,
  12, 19, 26, 33, 40, 48, 41, 34,
  27, 20, 13,  6,  7, 14, 21, 28,
  35, 42, 49, 56, 57, 50, 43, 36,
  29, 22, 15, 23, 30, 37, 44, 51,
  58, 59, 52, 45, 38, 31, 39, 46,
  53, 60, 61, 54, 47, 55, 62, 63
};



/*--------------------------------------*/
/* private huffman.c defines and macros */
/*--------------------------------------*/

// Number of HTable words sacrificed to bookkeeping:
#define GLOB_SIZE		32

// Memory size of HTables:
#define MAX_SIZE(hclass)		((hclass)?384:64)

// Available cells, top of storage:
#define MAX_CELLS(hclass)	(MAX_SIZE(hclass) - GLOB_SIZE)

// for Huffman tree descent
// lower 8 bits are for value/left son

#define GOOD_NODE_FLAG		0x100
#define GOOD_LEAF_FLAG		0x200
#define BAD_LEAF_FLAG		0x300
#define SPECIAL_FLAG		0x000
#define HUFF_FLAG_MSK		0x300

#define HUFF_FLAG(c)		((c) & HUFF_FLAG_MSK)
#define HUFF_VALUE(c)		((unsigned char)( (c) & (~HUFF_FLAG_MSK) ))



/*----------------------------------------------*/
/* some static structures for tree_vld storage	*/
/*----------------------------------------------*/

//static unsigned int	DC_Table0[MAX_SIZE(DC_CLASS)], DC_Table1[MAX_SIZE(DC_CLASS)];

//static unsigned int	AC_Table0[MAX_SIZE(AC_CLASS)], AC_Table1[MAX_SIZE(AC_CLASS)];

//static unsigned int	*HTable[4] = { &DC_Table0[0], &DC_Table1[0], &AC_Table0[0], &AC_Table1[0] };
static unsigned int	HTable[4][384];



/*----------------------------------------------*/
/* some static structures for fast_int_idct		*/
/*----------------------------------------------*/

#define Y(i,j)		Y[8*i+j]
#define X(i,j)		(output->block[i][j])

// This version is IEEE compliant using 16-bit arithmetic.

// The number of bits coefficients are scaled up before 2-D IDCT:
#define S_BITS	         3
// The number of bits in the fractional part of a fixed point constant:
#define C_BITS		14

#define SCALE(x,n)	((x) << (n))

// This version is vital in passing overall mean error test.
#define DESCALE(x, n)	(((x) + (1 << ((n)-1)) - ((x) < 0)) >> (n))

#define ADD(x, y)	((x) + (y))
#define SUB(x, y)	((x) - (y))
#define CMUL(C, x)	(((C) * (x) + (1 << (C_BITS-1))) >> C_BITS)

// Butterfly: but(a,b,x,y) = rot(sqrt(2),4,a,b,x,y)
#define but(a,b,x,y)	{ x = SUB(a,b); y = ADD(a,b); }



/*----------------------------------------------*/
/* some static structures for color				*/
/*----------------------------------------------*/

// Ensure number is >=0 and <=255			   */
#define Saturate(n)	((n) > 0 ? ((n) < 255 ? (n) : 255) : 0)



/*------------------------------------------------------*/
/* JPEG global variables here							*/
/*------------------------------------------------------*/

int verbose = 1;				// For debugging only.  Verbose = 1 gives debug info
int x_size, y_size;				// Video frame size
int MCU_valid[10];				// for every DCT block, component id then -1
int n_comp;						// number of components 1,3
cd_t comp[3];					// descriptors for 3 components
int MCU_sx, MCU_sy;				// MCU size in pixels
PBlock *MCU_buff[10];			// decoded component buffer between IDCT and color convert
int mx_size, my_size;			// picture size in units of MCUs
int	MCU_row, MCU_column;		// current position in MCU unit
int	in_frame;					// frame started ?
int curcomp;					// current component ?
FBlock *FBuff = NULL;			// scratch frequency buffer
PBlock *PBuff = NULL;			// scratch pixel buffer
byte *FrameBuffer = NULL;		// complete final RGB image
byte *ColorBuffer = NULL;		// MCU after color conversion
int rx_size, ry_size;			// down-rounded Video frame size in pixel units, multiple of MCU
PBlock *QTable[4];				// three quantization tables
int QTvalid[4];					// at most, but seen as four ...

// process statistics
int stuffers;			// number of stuff bytes in file
int passed;				// number of bytes skipped looking for markers

//static word *_decodeBuf = NULL;
word *_decodeBuf = NULL;
//static unsigned char * _decodeBuf = NULL;



/*------------------------------------------------------*/
/* PROGRAM STARTS HERE									*/
/*------------------------------------------------------*/


// Free the memory used by the current jpeg image.
void free_jpg( void )
{
	if( _decodeBuf )
	{
		TAP_MemFree( _decodeBuf );
		_decodeBuf = NULL;
	}
}



// Delete aborted stream function after debugging. - DMK
void aborted_stream( )
{
	if( verbose )
		printf("Aborted Stream.\n");
}



// Returns ceil(N/D).
int ceil_div(int N, int D)
{
	int i = N/D;

	if (N > D*i)
		i++;
	return i;
}



// Returns floor(N/D).
int floor_div(int N, int D)
{
	int i = N/D;

	if (N < D*i)
		i--;
	return i;
}



// Transform JPEG number format into usual 2's-complement format.
int reformat(unsigned long S, int good)
{
	int St;
 
	if (!good)
		return 0;
	St = 1 << (good-1);			// 2^(good-1)
	if (S < (unsigned long) St)
		return (S+1+((-1) << good));
	else
		return S;
}



/* next_byte:
 *  Returns next byte from decoding data stream.
 */
//int NEXTBYTE( char * data )
int NEXTBYTE( DECODER_DATA * x )
//char NEXTBYTE( DECODER_DATA * x )
{
	byte c;
//	int c;

	c = *x->data;
	x->data += 1;

	return c;
}



// Read one byte, testing for EOF
//static int read_1_byte ( char * x )
//int read_1_byte ( char * x )
int read_1_byte ( DECODER_DATA * x )
{
	int c;

	c = NEXTBYTE( x );

//	if (c == EOF)
//		ERREXIT("Premature EOF in JPEG file");

	return c;
}



// Read one byte, testing for EOF
//static int read_2_bytes ( char * x )
//int read_2_bytes ( char * x )
int read_2_bytes ( DECODER_DATA * x )
{
	int c1, c2;

	c1 = NEXTBYTE( x );

//	if (c1 == EOF)
//		ERREXIT("Premature EOF in JPEG file");

	c2 = NEXTBYTE( x );

//	if (c2 == EOF)
//		ERREXIT("Premature EOF in JPEG file");

	return (((unsigned int) c1) << 8) + ((unsigned int) c2);
}



//unsigned long get_bits( char * fi, int number)
unsigned long get_bits( DECODER_DATA * fi, int number)
{
	int i, newbit;
	unsigned long result = 0;
	unsigned char aux, wwindow;

	if (!number)
		return 0;

	for (i = 0; i < number; i++)
	{
		if (bit_count == 0)
		{
			wwindow = read_1_byte(fi);

			if (wwindow == 0xFF)
			switch (aux = read_1_byte(fi))
			{	// skip stuffer 0 byte
				case EOF:
				case 0xFF:
					printf("ERROR:\tRan out of bit stream\n");
					aborted_stream();
return 0;
				break;

				case 0x00:
					stuffers++;
				break;

				default:
					if (RST_MK(0xFF00 | aux))
						printf("ERROR:\tSpontaneously found restart!\n");
					printf("ERROR:\tLost sync in bit stream\n");
					aborted_stream();
return 0;
				break;
			}

			bit_count = 8;
		}
		else
			wwindow = window;

		newbit = (wwindow>>7) & 1;
		window = wwindow << 1;
		bit_count--;
		result = (result << 1) | newbit;
	}
	return result;
}



void clear_bits(void)
{
	bit_count = 0;
}



//unsigned char get_one_bit( char * fi)
unsigned char get_one_bit( DECODER_DATA * fi )
{
	int newbit;
	unsigned char aux, wwindow;

	if (bit_count == 0)
	{
		wwindow = read_1_byte( fi );

		if (wwindow == 0xFF)
		switch (aux = read_1_byte( fi ) )
		{	// skip stuffer 0 byte
			case EOF:
			case 0xFF:
				printf("ERROR:\tRan out of bit stream\n");
				aborted_stream();
return 0;
			break;

			case 0x00:
				stuffers++;
			break;

			default:
				if (RST_MK(0xFF00 | aux))
					printf(	"ERROR:\tSpontaneously found restart!\n");
				printf("ERROR:\tLost sync in bit stream\n");
				aborted_stream();
			break;
		}

 		bit_count = 8;
	} 
	else
		wwindow = window;

	newbit = (wwindow >> 7) & 1;
	window = wwindow << 1;
	bit_count--;
	return newbit;
}



//unsigned int get_size( char * fi )
unsigned int get_size( DECODER_DATA * fi )
{
/*
	unsigned char aux;

	aux = read_1_byte( fi );
	return (aux << 8) | read_1_byte( fi );	// big endian
*/
	return read_2_bytes( fi );
}



/*
 * Read the initial marker, which should be SOI.
 * For a JFIF file, the first two bytes of the file should be literally
 * 0xFF M_SOI.  To be more general, we could use next_marker, but if the
 * input file weren't actually JPEG at all, next_marker might read the whole
 * file and then return a misleading error message...
 */
//static int first_marker ( char * data )
int first_marker ( DECODER_DATA * data )
{
	int c1, c2;

	c1 = read_1_byte( data );
	c2 = read_1_byte( data );

	if ( c1 != 0xFF || c2 != M_SOI )
	{
		return 0;
//		ERREXIT("Not a JPEG file");
	}

	return c2;
}



// Find the next marker in the Jpg header.
//static int next_marker ( char * dec )
int next_marker ( DECODER_DATA * dec )
{
	int c;
	int discarded_bytes = 0;

	// Find 0xFF byte; count and skip any non-FFs.
	c = read_1_byte( dec );

if( verbose )
 printf( "Finding Next Marker.\nDiscarding: " );

	while (c != 0xFF) 
	{
		discarded_bytes++;
		c = read_1_byte( dec );

if( verbose )
 printf( "%X ", c );
	}

if( verbose )
 printf( "\nDiscarded %d bytes.\n  Reading: ", discarded_bytes );

	// Get marker code byte, swallowing any duplicate FF bytes.  Extra FFs
	// are legal as pad bytes, so don't count them in discarded_bytes.
	do 
	{
		c = read_1_byte( dec );

if( verbose )
 printf( "%X ", c );
	} 
	while( c == 0xFF );

if( verbose )
 printf( "\nReturning %X.\n", c );

	return c;
}



/*
 * Most types of marker are followed by a variable-length parameter segment.
 * This routine skips over the parameters for any marker we don't otherwise
 * want to process.
 * Note that we MUST skip the parameter segment explicitly in order not to
 * be fooled by 0xFF bytes that might appear within the parameter segment;
 * such bytes do NOT introduce new markers.
 */
static void skip_variable ( DECODER_DATA * dec )
// Skip over an unknown or uninteresting variable-length marker
{
	unsigned int length;

	// Get the marker parameter length count
	length = read_2_bytes( dec );

	// Length includes itself, so must be at least 2
//	if (length < 2)
//		ERREXIT("Erroneous JPEG marker length");
	length -= 2;

	// Skip over the remaining bytes
	while (length > 0) 
	{
		(void) read_1_byte( dec );
		length--;
	}
}



/*----------------------------------------------*/
/* rules for color conversion:					*/
/*  r = y		+1.402	v						*/
/*  g = y -0.34414u	-0.71414v					*/
/*  b = y +1.772  u								*/
/* Approximations: 1.402 # 7/5 = 1.400			*/
/*		.71414 # 357/500 = 0.714				*/
/*		.34414 # 43/125	= 0.344					*/
/*		1.772  = 443/250						*/
/*----------------------------------------------*/
/* Approximations: 1.402 # 359/256 = 1.40234	*/
/*		.71414 # 183/256 = 0.71484				*/
/*		.34414 # 11/32 = 0.34375				*/
/*		1.772 # 227/128 = 1.7734				*/
/*----------------------------------------------*/

void color_conversion(void)
{
	int  i, j;
	unsigned char y,cb,cr;
	signed char rcb, rcr;
	long r,g,b;
	long offset;

	for (i = 0; i < MCU_sy; i++)   // pixel rows
	{
		int ip_0 = i >> comp[0].VDIV;
		int ip_1 = i >> comp[1].VDIV;
		int ip_2 = i >> comp[2].VDIV;
		int inv_ndx_0 = comp[0].IDX + comp[0].HS * (ip_0 >> 3);
		int inv_ndx_1 = comp[1].IDX + comp[1].HS * (ip_1 >> 3);
		int inv_ndx_2 = comp[2].IDX + comp[2].HS * (ip_2 >> 3);
		int ip_0_lsbs = ip_0 & 7;
		int ip_1_lsbs = ip_1 & 7;
		int ip_2_lsbs = ip_2 & 7;
		int i_times_MCU_sx = i * MCU_sx;

		for (j = 0; j < MCU_sx; j++)   // pixel columns
		{
			int jp_0 = j >> comp[0].HDIV;
			int jp_1 = j >> comp[1].HDIV;
			int jp_2 = j >> comp[2].HDIV;
	  
			y  = MCU_buff[inv_ndx_0 + (jp_0 >> 3)]->block[ip_0_lsbs][jp_0 & 7];
			cb = MCU_buff[inv_ndx_1 + (jp_1 >> 3)]->block[ip_1_lsbs][jp_1 & 7];
			cr = MCU_buff[inv_ndx_2 + (jp_2 >> 3)]->block[ip_2_lsbs][jp_2 & 7];

			rcb = cb - 128;
			rcr = cr - 128;
	  
			r = y + ((359 * rcr) >> 8);
			g = y - ((11 * rcb) >> 5) - ((183 * rcr) >> 8);
			b = y + ((227 * rcb) >> 7);
	  
			offset = 3 * (i_times_MCU_sx + j);
			ColorBuffer[offset + 2] = Saturate(r);
			ColorBuffer[offset + 1] = Saturate(g);
			ColorBuffer[offset + 0] = Saturate(b);
			// note that this is SunRaster color ordering
		}
	}
}



/* Inverse 1-D Discrete Cosine Transform.
   Result Y is scaled up by factor sqrt(8).
   Original Loeffler algorithm.
*/
static void idct_1d(int *Y)
{
	int z1[8], z2[8], z3[8];

	// Stage 1:
	but(Y[0], Y[4], z1[1], z1[0]);
	//* rot(sqrt(2), 6, Y[2], Y[6], &z1[2], &z1[3]);
	z1[2] = SUB(CMUL( 8867, Y[2]), CMUL(21407, Y[6]));
	z1[3] = ADD(CMUL(21407, Y[2]), CMUL( 8867, Y[6]));
	but(Y[1], Y[7], z1[4], z1[7]);
	/* z1[5] = CMUL(sqrt(2), Y[3]);
     z1[6] = CMUL(sqrt(2), Y[5]);
	*/
	z1[5] = CMUL(23170, Y[3]);
	z1[6] = CMUL(23170, Y[5]);

	// Stage 2:
	but(z1[0], z1[3], z2[3], z2[0]);
	but(z1[1], z1[2], z2[2], z2[1]);
	but(z1[4], z1[6], z2[6], z2[4]);
	but(z1[7], z1[5], z2[5], z2[7]);

	// Stage 3:
	z3[0] = z2[0];
	z3[1] = z2[1];
	z3[2] = z2[2];
	z3[3] = z2[3];
	// rot(1, 3, z2[4], z2[7], &z3[4], &z3[7]);
	z3[4] = SUB(CMUL(13623, z2[4]), CMUL( 9102, z2[7]));
	z3[7] = ADD(CMUL( 9102, z2[4]), CMUL(13623, z2[7]));
	// rot(1, 1, z2[5], z2[6], &z3[5], &z3[6]);
	z3[5] = SUB(CMUL(16069, z2[5]), CMUL( 3196, z2[6]));
	z3[6] = ADD(CMUL( 3196, z2[5]), CMUL(16069, z2[6]));

	// Final stage 4:
	but(z3[0], z3[7], Y[7], Y[0]);
	but(z3[1], z3[6], Y[6], Y[1]);
	but(z3[2], z3[5], Y[5], Y[2]);
	but(z3[3], z3[4], Y[4], Y[3]);
}



// Inverse 2-D Discrete Cosine Transform.
void IDCT(const FBlock *input, PBlock *output)
{
	int Y[64];
	int k,l;

	// Pass 1: process rows.
	for (k = 0; k < 8; k++)
	{
  
        // Prescale k-th row:
		for (l = 0; l < 8; l++)
			Y(k,l) = SCALE(input->block[k][l], S_BITS);

		// 1-D IDCT on k-th row:
		idct_1d(&Y(k,0));
		// Result Y is scaled up by factor sqrt(8)*2^S_BITS.
	}

	// Pass 2: process columns.
	for (l = 0; l < 8; l++)
	{
		int Yc[8];

		for (k = 0; k < 8; k++)
			Yc[k] = Y(k,l);
		// 1-D IDCT on l-th column:
		idct_1d(Yc);
		// Result is once more scaled up by a factor sqrt(8).
		for (k = 0; k < 8; k++)
		{
			int r = 128 + DESCALE(Yc[k], S_BITS+3); // includes level shift

			// Clip to 8 bits unsigned:
			r = r > 0 ? (r < 255 ? r : 255) : 0;
			X(k,l) = r;
		}
	}
}



/*----------------------------------------------------------*/
/* Loading of Huffman table, with leaves drop ability	    */
/*----------------------------------------------------------*/
//int load_huff_tables( char * data )
int load_huff_tables( DECODER_DATA * data )
{
	char aux;
	int size, hclass, id;
	int LeavesN, NodesN, CellsN;
	int MaxDepth, i, k, done;
	int NextCellPt;				// where shall we put next cell
	int NextLevelPt;			// where shall node point to
	unsigned int flag;

	size = get_size( data );	// this is the tables' size 

	size -= 2;

	while (size>0)
	{
		aux = read_1_byte( data );
		hclass = first_quad(aux);		// AC or DC
		id = second_quad(aux);			// table no 

		if( id>1 )
		{ 
			printf("ERROR:\tBad HTable identity %d!\n",id);
			return -1;
		}

		id = HUFF_ID(hclass, id);
		if (verbose)
			printf("INFO:\tLoading Table %d\n", id);
		size--;
		CellsN = NodesN = 1;			// the root one
		LeavesN = 0;

 		for (i=0; i<MAX_CELLS(hclass); i++)
			HTable[id][i] = SPECIAL_FLAG;		// secure memory with crash value

		// first load the sizes of code elements
		// and compute contents of each tree level
		// Adress	Content	
		// Top		Leaves 0
		// Top-1	Nodes  0
		// ......	.......	
		// Top-2k	Leaves k
		// Top-2k-1	Nodes  k

		MaxDepth = 0;

		for (i=0; i<16; i++)
		{
			LeavesN = HTable[id][MAX_SIZE(hclass)-2*i-1] = read_1_byte( data );
			CellsN = 2*NodesN;			// nodes is old
			NodesN = HTable[id][MAX_SIZE(hclass)-2*i-2] = CellsN-LeavesN;
			if (LeavesN)
				MaxDepth = i;
		}
		size-=16;

		// build root at address 0, then deeper levels at
		// increasing addresses until MAX_CELLS reached

		HTable[id][0] = 1 | GOOD_NODE_FLAG;		// points to cell _2_ !
		// we give up address one to keep left brothers on even adresses
		NextCellPt = 2;
		i = 0;			// this is actually length 1

		done = 0;

		while (i<= MaxDepth)
		{
			// then load leaves for other levels
			LeavesN = HTable[id][MAX_SIZE(hclass)-2*i-1];
			for (k = 0; k<LeavesN; k++)
				if (!done)
				{
					HTable[id][NextCellPt++] = read_1_byte( data ) | GOOD_LEAF_FLAG;	
					if (NextCellPt >= MAX_CELLS(hclass))
					{
						done = 1;
						printf("WARNING:\tTruncating Table at depth %d\n", i+1);
					}
				}
				else
					read_1_byte( data );	// throw it away, just to keep file sync

 			size -= LeavesN;

			if (done || (i == MaxDepth))
			{
				i++; continue;
			}
			// skip useless node building

			// then build nodes at that level
			NodesN = HTable[id][MAX_SIZE(hclass)-2*i-2];

			NextLevelPt = NextCellPt+NodesN;
			for (k = 0; k<NodesN; k++)
			{
				if (NextCellPt >= MAX_CELLS(hclass))
				{
					done = 1;
					break;
				}

				flag = ((NextLevelPt|1) >= MAX_CELLS(hclass)) ? BAD_LEAF_FLAG : GOOD_NODE_FLAG;
				// we OR by 1 to check even right brother within range
				HTable[id][NextCellPt++] = (NextLevelPt/2) | flag;
				NextLevelPt += 2;
			}

			i++;	// now for next level
		}	// nothing left to read from file after maxdepth

		if (verbose)
			printf("INFO:\tUsing %d words of table memory\n", NextCellPt);

	}	// loop on tables
	return 0;
}



/*-----------------------------------*/
/* extract a single symbol from file */
/* using specified huffman table ... */
/*-----------------------------------*/
//unsigned char get_symbol( char * data, int select )
unsigned char get_symbol( DECODER_DATA * data, int select )
{
	int cellPt;

	cellPt = 0;		// this is the root cell

	while (HUFF_FLAG(HTable[select][cellPt]) == GOOD_NODE_FLAG)
		cellPt = get_one_bit( data ) | (HUFF_VALUE(HTable[select][cellPt])<<1);

	switch (HUFF_FLAG(HTable[select][cellPt]))
	{
		case SPECIAL_FLAG:
			printf("ERROR:\tFound forbidden Huffman symbol !\n");
			aborted_stream();
		break;

		case GOOD_LEAF_FLAG:
			return HUFF_VALUE(HTable[select][cellPt]);
		break;

		case BAD_LEAF_FLAG:
			// how do we fall back in case of truncated tree ?
			// suggest we send an EOB and warn
			printf("WARNING:\tFalling out of truncated tree !\n");
			return 0;
		break;

		default:
		break;
	}
	return 0;
}



/*----------------------------------------------------------*/
/* initialise MCU block descriptors							*/
/*----------------------------------------------------------*/
int init_MCU( void )
{
	int i, j, k, n, hmax = 0, vmax = 0;

	for (i = 0; i < 10; i++)
		MCU_valid[i] = -1;

	k = 0;

	for (i = 0; i < n_comp; i++)
	{
		if (comp[i].HS > hmax)
			hmax = comp[i].HS;
		if (comp[i].VS > vmax)
			vmax = comp[i].VS;
		n = comp[i].HS * comp[i].VS;

		comp[i].IDX = k;
		for (j = 0; j < n; j++)
		{
			MCU_valid[k] = i;
//			MCU_buff[k] = (PBlock *) malloc(sizeof(PBlock));
MCU_buff[k] = (PBlock *) TAP_MemAlloc(sizeof(PBlock));

			if (MCU_buff[k] == NULL)
			{
				printf("ERROR:\tCould not allocate MCU buffers!\n");
//				exit(1);			// Need to find way to abort - DMK
			}
			k++;

			if (k == 10)
			{
				printf("ERROR:\tMax subsampling exceeded!\n");
				return -1;
			}
		}
	}

	MCU_sx = 8 * hmax;
	MCU_sy = 8 * vmax;
	for (i = 0; i < n_comp; i++)
	{
		comp[i].HDIV = (hmax / comp[i].HS > 1);	// if 1 shift by 0
		comp[i].VDIV = (vmax / comp[i].VS > 1);	// if 2 shift by one
	}

	mx_size = ceil_div(x_size,MCU_sx);
	my_size = ceil_div(y_size,MCU_sy);
	rx_size = MCU_sx * floor_div(x_size,MCU_sx);
	ry_size = MCU_sy * floor_div(y_size,MCU_sy);

	return 0;
}



/*-------------------------------------------------*/
/* here we unpack, predict, unquantify and reorder */
/* a complete 8*8 DCT block ...			   */
/*-------------------------------------------------*/
//void unpack_block( char * data, FBlock *T, int select )
void unpack_block( DECODER_DATA * data, FBlock *T, int select )
{
	unsigned int i, run, cat;
	int value;
	unsigned char	symbol;

	// Init the block with 0's:
	for (i=0; i<64; i++)
		T->linear[i] = 0;

	// First get the DC coefficient:
	symbol = get_symbol( data , HUFF_ID(DC_CLASS, comp[select].DC_HT));
	value = reformat(get_bits( data, symbol), symbol);

	value += comp[select].PRED;
	comp[select].PRED = value;
	T->linear[0] = value * QTable[comp[select].QT]->linear[0];

	// Now get all 63 AC values:
	for (i=1; i<64; i++)
	{
		symbol = get_symbol( data, HUFF_ID(AC_CLASS, comp[select].AC_HT));
		if (symbol == HUFF_EOB)
			break;
		if (symbol == HUFF_ZRL)
			{
				i += 15; continue;
			}
		cat = symbol & 0x0F;
		run = (symbol>>4) & 0x0F;
		i += run;
		value = reformat(get_bits( data, cat), cat);

		// Dequantify and ZigZag-reorder:
		T->linear[G_ZZ[i]] = value * QTable[comp[select].QT]->linear[i];
	}
}



/*----------------------------------------------------------*/
/* this takes care for processing all the blocks in one MCU */
/*----------------------------------------------------------*/
//int process_MCU( char * data )
int process_MCU( DECODER_DATA * data )
{
	int  i;
	long offset;
	int  goodrows, goodcolumns;

	if (MCU_column == mx_size)
	{
		MCU_column = 0;
		MCU_row++;
		if (MCU_row == my_size)
		{
			in_frame = 0;
			return 0;
		}
//		if (verbose)
//			printf("INFO:\tProcessing stripe %d/%d\n", MCU_row+1, my_size);
	}

	for (curcomp = 0; MCU_valid[curcomp] != -1; curcomp++)
	{
		unpack_block( data, FBuff, MCU_valid[curcomp]); // pass index to HT,QT,pred
		IDCT(FBuff, MCU_buff[curcomp]);
	}

	// YCrCb to RGB color space transform here
	if (n_comp > 1)
		color_conversion();
	else
		memmove(ColorBuffer, MCU_buff[0], 64);

	// cut last row/column as needed
	if ((y_size != ry_size) && (MCU_row == (my_size - 1)))
		goodrows = y_size - ry_size;
	else
		goodrows = MCU_sy;

	if ((x_size != rx_size) && (MCU_column == (mx_size - 1)))
		goodcolumns = x_size - rx_size;
	else
		goodcolumns = MCU_sx;

	offset = n_comp * (MCU_row * MCU_sy * x_size + MCU_column * MCU_sx);

	for (i = 0; i < goodrows; i++)
		memmove(FrameBuffer + offset + n_comp * i * x_size, ColorBuffer + n_comp * i * MCU_sx, n_comp * goodcolumns);

	MCU_column++;
	return 1;
}



void free_structures(void)
{
	int i;

	if( QTable )
	{
		for( i=0; i<4; i++ )
		{
			if( QTvalid[i] )
			{
				TAP_MemFree( QTable[i] );
				QTable[i] = NULL;
			}
		}
	}

	if( ColorBuffer )
	{
		TAP_MemFree( ColorBuffer );
		ColorBuffer = NULL;
	}

	if( FrameBuffer )
	{
    	TAP_MemFree( FrameBuffer );
		FrameBuffer = NULL;
	}

	if( MCU_buff )
	{
		for( i=0; MCU_valid[i] != -1; i++ )
		{
			TAP_MemFree( MCU_buff[i] );
			MCU_buff[i] = NULL;
		}
	}

	if( FBuff )
	{
		TAP_MemFree( FBuff );
		FBuff = NULL;
	}

	if( PBuff )
	{
		TAP_MemFree( PBuff );
		PBuff = NULL;
	}
}



// Find the next marker in the Jpg header.
static int get_next_MK( DECODER_DATA * dec )
{
	int c;

	// Find 0xFF byte; count and skip any non-FFs.
	c = read_1_byte( dec );

	while (c != 0xFF) 
	{
		c = read_1_byte( dec );
	}

	// Get marker code byte, swallowing any duplicate FF bytes.  Extra FFs
	// are legal as pad bytes, so don't count them in discarded_bytes.
	do 
	{
		c = read_1_byte( dec );
	} 
	while (c == 0xFF);

	return c;
}



// For all components reset DC prediction value to 0.
void reset_prediction(void)
{
	int i;

	for (i=0; i<3; i++)
		comp[i].PRED = 0;
}



/*----------------------------------------------------------*/
/* loading and allocating of quantization table             */
/* table elements are in ZZ order (same as unpack output)   */
/*----------------------------------------------------------*/
int load_quant_tables(DECODER_DATA * fi)
{
	char aux;
	unsigned int size, n, i, id, x;

	size = get_size( fi ); // this is the tables' size
	n = (size - 2) / 65;

	for (i = 0; i < n; i++)
	{
		aux = read_1_byte( fi );
		if (first_quad(aux) > 0)
		{
			printf("ERROR:\tBad QTable precision!\n");
			return -1;
		}
		id = second_quad(aux);

		if (verbose)
			printf("INFO:\tLoading table %d\n", id);

		QTable[id] = (PBlock *) TAP_MemAlloc(sizeof(PBlock));

		if (QTable[id] == NULL)
		{
			printf("ERROR:\tCould not allocate table storage!\n");
//			exit(1);
		}

		QTvalid[id] = 1;
		for (x = 0; x < 64; x++)
			QTable[id]->linear[x] = read_1_byte( fi );
		/*
			-- This is useful to print out the table content --
			for (x = 0; x < 64; x++)
			printf("%d\n", QTable[id]->linear[x]);
		*/
	}
	return 0;
}



/*-------------------------------------------*/
/* this is to save final RGB image to disk   */
/* using the bitmap uncompressed format      */
/*-------------------------------------------*/

/* BMP header */

// Altered for BMP - DMK
void BMP_save()
{
	int i, j;
	int read_index = 0;
	int write_index = 0;

if( verbose )
 printf( "Staring BMP Write.\n" );

if( FrameBuffer == NULL )
 return;

//	_decodeBuf = TAP_MemAlloc( (x_size+fill_size) * y_size * sizeof(word) );
_decodeBuf = TAP_MemAlloc( (x_size) * y_size * sizeof(byte) );
//_decodeBuf = TAP_MemAlloc( (x_size+fill_size) * y_size * sizeof(word) * 2 );

if( verbose )
{
 if( _decodeBuf )
  printf( "Memory allocated successfully.\nWrite index: " );
 else
  printf( "Memory failed to Allocate.\nWrite index: " );
}

	for( i=y_size; i>0; i-- )
	{
		for( j=0; j<x_size; j++ )
		{
			*(_decodeBuf + write_index) = RGB8888( *(FrameBuffer+read_index+2), *(FrameBuffer+read_index+1), *(FrameBuffer+read_index) );

			read_index = read_index + 3;
			write_index++;
		}
	}

if( verbose )
 printf( "\nFinished BMP Write.\n" );

}



// Start the decoding process.
word *decode_jpg( void * data, int start )
//unsigned char *decode_jpg( void * data, int start )
{
	unsigned int aux;
	unsigned int mark;
	int i,j;
	int QTvalid[4];
	int n_restarts, restart_interval, leftover;		// RST check
	static DECODER_DATA dec;

	if( data == NULL )
	{
		printf( "NO DATA - Decode jpg Routine\n" );
		return NULL;
	}

	// Set up data structure for decoding
	memset(&dec, 0, sizeof( &dec ));

	dec.data_start = data;
	dec.data = dec.data_start;

	// First find the SOI marker:
	aux = first_marker( &dec );
	if (aux != M_SOI)
	{
if( verbose )
 printf( "SOI marker not found.\n" );

		aborted_stream();
		return NULL;
	}

//	if ( start == 1 )
//		return aux;

	if( verbose )
		printf( "Found the SOI marker!\n" );

	// Setup variables
	in_frame = 0;
	restart_interval = 0;
	for (i = 0; i < 4; i++)
		QTvalid[i] = 0;

	// Now process segments as they appear:
	do 
	{
		mark = next_marker( &dec );

if( verbose )
 printf( "Next Marker is %X\n", mark );

		switch (mark) 
		{
			case M_SOF0:
				if ( verbose )
					printf("Found the SOF marker!\n");

				in_frame = 1;
				read_2_bytes( &dec );	// header size, don't care

				// load basic image parameters
				read_1_byte( &dec );	// precision, 8bit, don't care
				y_size = read_2_bytes( &dec );
				x_size = read_2_bytes( &dec );
				if ( verbose )
					printf("Image size is %d by %d\n", x_size, y_size);

// Return if only decoding image size.
if ( start == 1 )
{
 printf( "Freeing structures.\n" );
 free_structures();

 dec.data = dec.data_start;

 printf( "Returning from decode_jpg, start = %d\n\n", start );
 return (word *)1;;
}

				n_comp = read_1_byte( &dec );	// # of components
				if (verbose)
				{
					printf("INFO:\t");
					switch (n_comp)
					{
						case 1:
							printf("Monochrome");
						break;
						case 3:
							printf("Color");
						break;
						default:
							printf("Not a");
						break;
					}
					printf(" JPEG image!\n");
				}

				for (i = 0; i < n_comp; i++)
				{
					// component specifiers
					comp[i].CID = read_1_byte( &dec );
					aux = read_1_byte( &dec );
					comp[i].HS = first_quad(aux);
					comp[i].VS = second_quad(aux);
					comp[i].QT = read_1_byte( &dec );
				}
				if ((n_comp > 1) && verbose)
					printf("INFO:\tColor format is %d:%d:%d, H=%d\n",
				comp[0].HS * comp[0].VS,
				comp[1].HS * comp[1].VS,
				comp[2].HS * comp[2].VS,
				comp[1].HS);

				if (init_MCU() == -1)
				{
					aborted_stream();
return NULL;
    			}

				// dimension scan buffer for YUV->RGB conversion
//				FrameBuffer = (word *) malloc( (size_t) x_size * y_size * n_comp);
FrameBuffer = (byte *) TAP_MemAlloc( (size_t) x_size * y_size * n_comp);
//				ColorBuffer = (word *) malloc( (size_t) MCU_sx * MCU_sy * n_comp);
ColorBuffer = (byte *) TAP_MemAlloc( (size_t) MCU_sx * MCU_sy * n_comp);
//				FBuff = (FBlock *) malloc(sizeof(FBlock));
FBuff = (FBlock *) TAP_MemAlloc(sizeof(FBlock));
//				PBuff = (PBlock *) malloc(sizeof(PBlock));
PBuff = (PBlock *) TAP_MemAlloc(sizeof(PBlock));

				if ((FrameBuffer == NULL) || (ColorBuffer == NULL) || (FBuff == NULL) || (PBuff == NULL) ) 
				{
					printf("ERROR:\tCould not allocate pixel storage!\n");
//					exit(1);
return NULL;
				}
			break;

			case M_DHT:
				if (verbose)
					printf("INFO:\tDefining Huffman Tables\n");
				if (load_huff_tables( &dec ) == -1)
				{
					aborted_stream();
return NULL;
				}
			break;

			case M_DQT:
				if (verbose)
					printf("INFO:\tDefining Quantization Tables\n");
				if (load_quant_tables( &dec ) == -1)
				{
					aborted_stream();
return NULL;
				}
			break;

			case M_DRI:
				read_2_bytes( &dec );	// skip size
				restart_interval = read_2_bytes( &dec );
				if (verbose)
					printf("INFO:\tDefining Restart Interval %d\n", restart_interval);
			break;	

			case M_SOS:		// lots of things to do here
				if (verbose)
					printf("INFO:\tFound the SOS marker!\n");
				read_2_bytes( &dec ); // don't care
				aux = read_1_byte( &dec );
				if (aux != (unsigned int) n_comp)
				{
					printf("ERROR:\tBad component interleaving!\n");
					aborted_stream();
return NULL;
				}

				for (i = 0; i < n_comp; i++) 
				{
					aux = read_1_byte( &dec );
					if (aux != comp[i].CID)
					{
						printf("ERROR:\tBad Component Order!\n");
						aborted_stream();
return NULL;
					}
					aux = read_1_byte( &dec );
					comp[i].DC_HT = first_quad(aux);
					comp[i].AC_HT = second_quad(aux);
				}

				read_2_bytes( &dec );			// skip things
				read_1_byte( &dec );			// skip things

				MCU_column = 0;
				MCU_row = 0;
				clear_bits();
				reset_prediction();

				// main MCU processing loop here
				if (restart_interval)
				{
					n_restarts = ceil_div(mx_size * my_size, restart_interval) - 1;
					leftover = mx_size * my_size - n_restarts * restart_interval;
					// final interval may be incomplete

					for (i = 0; i < n_restarts; i++)
					{
						for (j = 0; j < restart_interval; j++)
							process_MCU( &dec );
							// proc till all EOB met

						aux = get_next_MK( &dec );
						if (!RST_MK(aux))
						{
							printf("ERROR:\tLost Sync after interval!\n");
							aborted_stream();
return NULL;
						}
//						else if (verbose)
//							printf("INFO:\tFound Restart Marker\n");

						reset_prediction();
						clear_bits();
					}		// intra-interval loop
				} 
				else
					leftover = mx_size * my_size;

				// process till end of row without restarts
				for (i = 0; i < leftover; i++)
					process_MCU( &dec );

				in_frame = 0;
			break;

			case M_EOI:
				if (verbose)
					printf("INFO:\tFound the EOI marker!\n");
				if (in_frame)
				{
					aborted_stream();
return NULL;
				}

				if (verbose)
					printf("INFO:\tTotal skipped bytes %d, total stuffers %d\n", passed, stuffers);

				BMP_save();

printf( "Freeing structures.\n" );
				free_structures();

//				printf("\nDone.\n");
printf( "Returning from decode_jpg, start = %d\n\n", start );
//				exit(0);
return _decodeBuf;
			break;

			case M_COM:
				if( verbose )
					printf( "INFO:\tSkipping comments\n" );
				skip_variable( &dec );
			break;

// Should never get here!
			case EOF:
				if( verbose )
					printf( "ERROR:\tRan out of input data!\n" );
				aborted_stream();
return NULL;

			default:
//				if ((mark & MK_MSK) == APP_MK)
				if ((mark & M_MSK) == M_APP0)
				{
					if( verbose )
						printf( "INFO:\tSkipping application data\n" );
					skip_variable( &dec );
					break;
				}
				if (RST_MK(mark))
				{
					reset_prediction();
					break;
				}
				// if all else has failed ...
				printf( "WARNING:\tLost Sync outside scan, %d!\n", mark );
				aborted_stream();
return NULL;
			break;
		} // end switch
	} 
	while (1);

printf( "Return _decodeBuf.\n" );

	if( _decodeBuf )
		return  _decodeBuf;
	else
		return NULL;
}



// Read a jpeg file from the Topfield HDD
byte *load_jpg( char * filename, int * width, int * height )
{
	TYPE_File * file;
	long size;
	unsigned char * data;

	file = TAP_Hdd_Fopen( filename );
	if( file == NULL )
		return NULL;

	size = TAP_Hdd_Flen( file );

	data = (unsigned char *)TAP_MemAlloc( size );

	if (!data)
		return NULL;

	TAP_Hdd_Fread( data, size, 1, file );
	TAP_Hdd_Fclose( file );

printf( "Decoding %s.\n", filename );

	if( decode_jpg( data, 1 ) == NULL )
	{
		TAP_MemFree( data );
		return NULL;
	}

	*width = x_size;
	*height = y_size;

printf( "Returning to mp3show.\n\n", filename );

	return data;
}

