#include <stdlib.h>
#include <memory.h>
#include <string.h>
#include "plugin_api.h"

//-----------------------------------------------------------
// Some Useful Functions
//-----------------------------------------------------------


// zero plugin counter
//									<<<<<
static __forceinline void
helper_plugin_cntr_zero( int counter_id ) {
    get_pcntr( pluginId, counter_id );
    WU_loadGlobalRegister( stats_regnum, 0, stats_cerr );
}

// transfer debug msg from sram to local memory
//									<<<<<
static __forceinline void
helper_dbg_message( __declspec(local_mem) char *dbgmsg,
			__declspec(sram) char *mymsg, unsigned int nbytes ) {
    memcpy_lmem_sram( dbgmsg, mymsg, nbytes );
    onl_api_debug_message( msgNextBlock, dbgmsg );
}

//
// return difference of 2 timestamps in msec
//									<<<<<
static __forceinline unsigned long
diff_msec(	__declspec(local_mem) long long t2,
		__declspec(local_mem) long long t1 ) {
    __declspec(gp_reg)	  unsigned long msec;
    msec = ( ((t2 - t1)<<4) /100000)/14;		// 16*delta/10^5/14
    //XXX msec = (16*(t2 - t1)/100000)/14;		// 16*delta/10^5/14
    return msec;
}

// convert msec to #ME cycles assuming 1.4 GHz ME
//									<<<<<
// CAVEAT:  msec <= 49000 msec
//
static __forceinline unsigned long
msec2cycles ( __declspec(gp_reg) unsigned long msec ) {
    __declspec(gp_reg)	  unsigned long cycles;
    cycles = ( 14*msec*100000 );
    //XXX cycles = ( 14*msec*100000 ) && 0xfffffff0;
    return cycles;
}

//
// return difference of 2 timestamps in usec
//									<<<<<
static __forceinline long long
diff_usec(	__declspec(local_mem) long long t2,
		__declspec(local_mem) long long t1 ) {
    __declspec(gp_reg)	  unsigned long usec;
    usec = ( (t2 - t1)<<4 )/1400;			// 16*delta/10^2/14
    return usec;
}

// convert usec to #ME cycles assuming 1.4 GHz ME
//									<<<<<
static __forceinline long long
usec2cycles ( __declspec(gp_reg) long long usec ) {
    __declspec(gp_reg)	  long long cycles;
    cycles = 1400*usec;
    return cycles;
}

//
// return difference of 2 timestamps in nsec
//									<<<<<
static __forceinline long long
diff_nsec(	__declspec(local_mem) long long t2,
		__declspec(local_mem) long long t1 ) {
    __declspec(gp_reg)	  long long nsec;
    nsec = (((t2 - t1)<<4)*10)/14;			// 16*delta*10/14
    return nsec;
}

// convert nsec to #ME cycles assuming 1.4 GHz ME
//
// CAVEAT:  need atleast 10 nsec
//									<<<<<
static __forceinline long long
nsec2cycles ( __declspec(gp_reg) long long nsec ) {
    __declspec(gp_reg)	  long long cycles;
    cycles = 14*nsec/10;
    return cycles;
}

// wait for the packet signal
//									<<<<<
static __forceinline void
wait_packet_signal(SIGNAL *s) {
    wait_for_all(s);
}

// send the packet signal
//									<<<<<
static __forceinline void
send_packet_signal(SIGNAL *s) {
    int c;
  
    c = ctx();
#if ( (FIRST_PACKET_THREAD) == (LAST_PACKET_THREAD) )
    if(c == FIRST_PACKET_THREAD)
    {
	signal_same_ME(__signal_number(s), FIRST_PACKET_THREAD);
	__implicit_write(s);
    }
#else
    if(c >= FIRST_PACKET_THREAD && c < LAST_PACKET_THREAD) 
    {
	signal_same_ME_next_ctx(__signal_number(s));
	__implicit_write(s);
    }
    else if(c == LAST_PACKET_THREAD)
    {
	signal_same_ME(__signal_number(s), FIRST_PACKET_THREAD);
	__implicit_write(s);
    }
#endif
}

// Same as dl_source_packet() but no pkt will be dequeued from the previous
//	processing block
//									<<<<<
void dl_source_nopacket( void )
{
#ifdef DL_ORDERED
    wait_packet_signal(&dl_source_packet_sig);
#endif

#ifdef DL_ORDERED
    send_packet_signal(&dl_source_packet_sig);
#endif
}

// Same as dl_sink_packet() but no pkt will be enqueued to the next
//	processing block
//
void dl_sink_nopacket( void )
{
#ifdef DL_ORDERED
    wait_packet_signal(&dl_sink_packet_sig);
#endif

#ifdef DL_ORDERED
    send_packet_signal(&dl_sink_packet_sig);
#endif  
}

//									<<<<<
// return address of next word following the current word where a word is
// 	any char sequence terminated by the space character.  Limit the
// 	search to n characters.  Return 0 if no word found.
// If the string is "xxx  yyy" with p pointing to the first 'x',
// 	helper_nxt_token( p, 7 ) returns addr of first 'y';
// 	helper_nxt_token( p, 4 ) returns 0.
// See helper_tokenize() for function that prepares entire string of words
// for using getting multiple args with helper_atou_sram()
//
#define nxt_token	helper_nxt_token
static __forceinline __declspec(sram) char *
helper_nxt_token( __declspec(sram) char *p, __declspec(gp_reg) int n ) {
    __declspec(sram) char *pend;

    pend = p + n;

    // find first SP character
    while( p<pend ) {		// find SP char
    	if( *p == ' ' )	break;
	p++;
    }
    if( p >= pend )	return 0;
    ++p;

    // scan past consecutive SP characters
    while( p<pend ) {
    	if( *p == ' ' )	++p;
	else		break;
    }

    // p points to 1st char of word if there is a word
    if( p >= pend )	return 0;
    else		return p;
}

//									<<<<<
// convert the unsigned long 'x' to the ascii string starting at 'p' which
// 	is 'n' bytes long.
// 	The resulting string is left-justified.
//
static __forceinline __declspec(gp_reg) int
helper_ultoa_sram(	__declspec(sram) unsigned long x,
		__declspec(sram) char *p,
		__declspec(gp_reg) int n ) {
    __declspec(gp_reg) int k;
    __declspec(gp_reg) int K;
    __declspec(gp_reg) int r;
    __declspec(gp_reg) int ndigits;
    __declspec(sram) char *pend;

    ndigits = 0;
    pend = p + n - 1;
    *pend = '\0';
    --pend;

    while( pend >= p ) {	// right-justified number w/ leading spaces
    	if( x > 0 ) {
    	    r = x%10;
	    *pend = '0' + r;
	    ++ndigits;
	} else if( ndigits == 0 ) {
	    *pend = '0';
	    ndigits = 1;
	} else {
	    *pend = ' ';	// ? can we omit this case ?
	}
	x = x/10;
    	--pend;
    }
    if( x > 0 )		return -1;	// number too big

    K = ndigits + 1;			// left-justify
    pend = p + n - ndigits - 1;
    for( k=0; k<K; k++) {
    	*p = *pend;
	p++;
	pend++;
    }
    *p = '\0';
    return ndigits;
}

// convert ascii string to unsigned long
//
// example string:	"   123"
// (note:  there is a NUL byte at the end of the string)
//									<<<<<
static __forceinline unsigned long
helper_atou_sram( __declspec(sram) char *p ) {
    __declspec(gp_reg) unsigned long x = 0;
    __declspec(gp_reg) int	     y;

    while( *p != '\0' ) {
    	if( *p != ' ' )	{
    	    x *= 10;
    	    y = *p - '0';
	    if( (y<0) || (y>9) )	return 0;
	    x += y;
	}
	p++;
    }
    return x;
}

// convert unsigned long to hex ascii
//									<<<<<
static __forceinline void
helper_ul2hex_sram( unsigned long x, char hexstr[8] ) {
    unsigned int c;

    c = x>>28;
    if( c < 10 )	hexstr[0] = '0' + c;
    else		hexstr[0] = 'a' + c - 10;
    c = x>>24 & 0xf;
    if( c < 10 )	hexstr[1] = '0' + c;
    else		hexstr[1] = 'a' + c - 10;
    c = x>>20 & 0xf;
    if( c < 10 )	hexstr[2] = '0' + c;
    else		hexstr[2] = 'a' + c - 10;
    c = x>>16 & 0xf;
    if( c < 10 )	hexstr[3] = '0' + c;
    else		hexstr[3] = 'a' + c - 10;
    c = x>>12 & 0xf;
    if( c < 10 )	hexstr[4] = '0' + c;
    else		hexstr[4] = 'a' + c - 10;
    c = x>>8 & 0xf;
    if( c < 10 )	hexstr[5] = '0' + c;
    else		hexstr[5] = 'a' + c - 10;
    c = x>>4 & 0xf;
    if( c < 10 )	hexstr[6] = '0' + c;
    else		hexstr[6] = 'a' + c - 10;
    c = x & 0xf;
    if( c < 10 )	hexstr[7] = '0' + c;
    else		hexstr[7] = 'a' + c - 10;
}


// set output to drop pkt
//
static __forceinline void
helper_set_out_to_DROP( ) {
    onl_api_drop();
}


// output debug msg with 2 args (string, unsigned long)
//	the c-string cstr can not be more than 28 bytes long
//
void
helper_sram_dbgmsg_str_1ul( char *cstr, unsigned long xval ) {
    __declspec(sram) char sram_dbgmsg_buf[28];
    __declspec(local_mem) char lmem_dbgmsg_buf[28];
    __declspec(gp_reg)	int n;

    n = strlen_sram( cstr );
    strcpy_sram( sram_dbgmsg_buf, cstr );
    sram_dbgmsg_buf[n] = ' ';
    sram_dbgmsg_buf[n+1] = '\0';
    ++n;
    helper_ultoa_sram( xval, sram_dbgmsg_buf+n, 28-n );

    memcpy_lmem_sram( lmem_dbgmsg_buf, (void *)sram_dbgmsg_buf, 28 );
    onl_api_debug_message( msgNextBlock, lmem_dbgmsg_buf );
}


// output debug msg with 1 string arg
// 	the c-string cstr can not be more than 28 bytes long
//
void
helper_sram_dbgmsg_str( char *cstr ) {
    __declspec(sram) char sram_dbgmsg_buf[28];
    __declspec(local_mem) char lmem_dbgmsg_buf[28];

    strncpy_sram( sram_dbgmsg_buf, cstr, 28 );
    memcpy_lmem_sram( lmem_dbgmsg_buf, (void *)sram_dbgmsg_buf, 28 );
    onl_api_debug_message( msgNextBlock, lmem_dbgmsg_buf );
}


// output control msg with 1 unsigned long
//
void
helper_sram_outmsg_1ul(	unsigned long x0,
			__declspec(local_mem) char *outmsgstr ) {
    __declspec(sram) char	sram_msg_buf[28];

    helper_ultoa_sram( x0, sram_msg_buf, 28 );
    memcpy_lmem_sram( outmsgstr, sram_msg_buf, 28 );
}


// output control msg with 2 unsigned longs
//
void
helper_sram_outmsg_2ul(	unsigned long x0,
			unsigned long x1,
			__declspec(local_mem) char *outmsgstr ) {
    __declspec(sram) char	SPACE[2] = " ";
    __declspec(sram) char	sram_msg_buf[28];
    __declspec(sram) char	sram_tmpstr[16];

    helper_ultoa_sram( x0, sram_msg_buf, 28 );
    strcat_sram( sram_msg_buf, SPACE );
    helper_ultoa_sram( x1, sram_tmpstr, 16 );
    strcat_sram( sram_msg_buf, sram_tmpstr );
    memcpy_lmem_sram( outmsgstr, sram_msg_buf, 28 );
}


// output control msg with 3 unsigned longs
//
void
helper_sram_outmsg_3ul(	unsigned long x0,
			unsigned long x1,
			unsigned long x2,
			__declspec(local_mem) char *outmsgstr ) {
    __declspec(sram) char	SPACE[2] = " ";
    __declspec(sram) char	sram_msg_buf[28];
    __declspec(sram) char	sram_tmpstr[16];

    helper_ultoa_sram( x0, sram_msg_buf, 28 );
    strcat_sram( sram_msg_buf, SPACE );
    helper_ultoa_sram( x1, sram_tmpstr, 16 );
    strcat_sram( sram_msg_buf, sram_tmpstr );
    strcat_sram( sram_msg_buf, SPACE );
    helper_ultoa_sram( x2, sram_tmpstr, 16 );
    strcat_sram( sram_msg_buf, sram_tmpstr );
    memcpy_lmem_sram( outmsgstr, sram_msg_buf, 28 );
}


// output control msg with 5 unsigned longs
//
void
helper_sram_outmsg_5ul(	unsigned long x0,
			unsigned long x1,
			unsigned long x2,
			unsigned long x3,
			unsigned long x4,
			__declspec(local_mem) char *outmsgstr ) {
    __declspec(sram) char	SPACE[2] = " ";
    __declspec(sram) char	sram_msg_buf[28];
    __declspec(sram) char	sram_tmpstr[4];

    helper_ultoa_sram( x0, sram_msg_buf, 28 );
    strcat_sram( sram_msg_buf, SPACE );
    helper_ultoa_sram( x1, sram_tmpstr, 4 );
    strcat_sram( sram_msg_buf, sram_tmpstr );
    strcat_sram( sram_msg_buf, SPACE );
    helper_ultoa_sram( x2, sram_tmpstr, 4 );
    strcat_sram( sram_msg_buf, sram_tmpstr );
    strcat_sram( sram_msg_buf, SPACE );
    helper_ultoa_sram( x3, sram_tmpstr, 4 );
    strcat_sram( sram_msg_buf, sram_tmpstr );
    strcat_sram( sram_msg_buf, SPACE );
    helper_ultoa_sram( x4, sram_tmpstr, 4 );
    strcat_sram( sram_msg_buf, sram_tmpstr );
    memcpy_lmem_sram( outmsgstr, sram_msg_buf, 28 );
}


// output debug msg with 1 uint args
//
void
helper_sram_dbgmsg_1ul(	unsigned long x ) {
    __declspec(local_mem) char lmem_dbgmsg_buf[28];

    helper_sram_outmsg_1ul( x, lmem_dbgmsg_buf );
    onl_api_debug_message( msgNextBlock, lmem_dbgmsg_buf );
}


// output debug msg with 3 uint args
//
void
helper_sram_dbgmsg_3ul(	unsigned long x0,
			unsigned long x1,
			unsigned long x2) {
    __declspec(local_mem) char lmem_dbgmsg_buf[28];

    helper_sram_outmsg_3ul( x0, x1, x2, lmem_dbgmsg_buf );
    onl_api_debug_message( msgNextBlock, lmem_dbgmsg_buf );
}


// set output to DO_NOTHING
//
static __forceinline void
helper_set_out_to_DO_NOTHING( ) {
    dlNextBlock = DO_NOTHING;
}


// set ring_out data from ring_in data for default for nextBlk
// 	return 0 if OK; -1 otherwise
// Note:  Maybe we can just do word copying since all we are doing is
// 		copying from input ring to output ring and we know
// 		that the message is 6 words.  Then, modify the rightmost
// 		3 bits of the uc_mc_bits to reflect the proper destination
// 		plugin ME ... right now, this field is never examined by
// 		the plugin framework.
//
static __forceinline int
helper_set_meta_default( __declspec(gp_reg) int nextBlk ) {
    __declspec(gp_reg) int	out_port;

    dlNextBlock = nextBlk;

    // ignore TX and XSCALE for now

    if( nextBlk == QM ) {
    	__declspec(gp_reg) int	out_port;
	out_port = (ring_in.uc_mc_bits >> 3) & 0x7;
	onl_api_update_ring_out_to_qm(
    		ring_in.buf_handle_lo24, 
    		out_port,
		(((out_port+1) << 13) | ring_in.qid), 
		ring_in.l3_pkt_len);
    } else if( nextBlk == DROP ) {
	onl_api_update_ring_out_to_freelist( ring_in.buf_handle_lo24 );
    } else if( nextBlk == MUX ) {
	onl_api_update_ring_out_to_mux(
    		ring_in.buf_handle_lo24, 
    		(ring_in.uc_mc_bits >> 3) & 0x7,
		ring_in.in_port,
		ring_in.plugin_tag, 
		ring_in.stats_index, 
		0, 
		ring_in.qid, 
		ring_in.l3_pkt_len);
    } else if(	nextBlk == PACKET_IN_RING_0	||
		(nextBlk == PACKET_IN_RING_1)	||
		(nextBlk == PACKET_IN_RING_2)	||
		(nextBlk == PACKET_IN_RING_3)	||
		(nextBlk == PACKET_IN_RING_4)		) {
	onl_api_update_ring_out_to_plugin(
    		ring_in.buf_handle_lo24, 
    		(ring_in.uc_mc_bits >> 3) & 0x7,
		ring_in.in_port,
		ring_in.plugin_tag,
		ring_in.stats_index, 
		0,
		ring_in.qid, 
		ring_in.nh_eth_daddr_hi32,
		ring_in.nh_eth_daddr_lo16,
		ring_in.eth_type,
		ring_in.uc_mc_bits,
		ring_in.l3_pkt_len);
    } else if( nextBlk == DO_NOTHING ) {
    	// do nothing
    } else {					// all other options
    	return -1;
    }
    return 0;
}


// set plugin tag field in outgoing meta-pkt to 'tag'
//
static __forceinline void
helper_set_meta_mux_tag( __declspec(gp_reg) int tag ) {
    ring_out.plugin_mux_data_out.plugin_tag = tag;
}


// increment plugin tag field in outgoing meta-pkt
//
static __forceinline void
helper_inc_meta_mux_tag( void ) {
    ++ring_out.plugin_mux_data_out.plugin_tag;
}


//									<<<<<
// Find the next word where a word is any char sequence terminated by
// the space character and terminate the word with the NUL byte.
// Return 0 if no word found.
//
// Used in processing control messages.
//
// For example, if cstr = "  234   89 1  " (note: character 14 has NUL byte),
// the new cstr' = "  234\0 89 1  " and the returned pointer points to '2'.
// Typical usage in handle_msg() is:
//
//	char * word;
// 	word = helper_tokenize( cstr );		// get command
// 	if( word == 0 )	... error ...
// 	word = helper_tokenize( word );		// get 1st arg
// 	if( word == 0 )	... error ...
// 	x1 = helper_atou_sram( word );
// 	word = helper_tokenize( word );		// get 2nd arg
// 	x2 = helper_atou_sram( word );
//
// CAVEAT 1:  The string must be a c-string; i.e., have the terminating
// 		NUL byte.
// CAVEAT 2:  A TAB character will be treated as part of a word which is
// 		probably not the behavior you want!
//
char *
helper_tokenize( char *p ) {
    char	*word;

    // skip past leading SP characters
    while( p != '\0' ) {
    	if( *p == ' ' )	p++;
	else		break;
    }
    if( p == '\0' )	return 0;

    word = p;

    // find first SP or NUL after end of word
    while( p != '\0' ) {
    	if( *p != ' ' )	p++;
	else		break;
    }

    if( *p == ' ' )		*p = '\0';
    else if( *p != '\0' )	word = 0;

    return word;
}


// count number of words in control message
//
//	found end of word if previous character was not a space
//	and current character is either a space or '\0'
//
// CAVEAT:  Doesn't handle TAB character!
//
unsigned int
helper_count_words( char *p ) {
    char		pchar;		// previous character
    unsigned int	nwords;
    unsigned int	i;

    pchar = ' ';
    nwords = 0;
    for( i=0; i<28; i++ ) {
    	if( pchar != ' ' ) {
	    if( *p == ' ' )		++nwords;
	    else if( *p == '\0' ) {
	    	++nwords;
		break;
	    }
	}
	pchar = *p;
	p++;
    }

    return nwords;
}

