/*
 * Copyright (c) 1994-2009, ITT Visual Information Solutions. All
 * rights reserved. This software includes information which is
 * proprietary to and a trade secret of ITT Visual Information Solutions.
 * It is not to be disclosed to anyone outside of this organization.
 * Reproduction by any means whatsoever is prohibited without express
 * written permission.
 */


header {
/*
 * Copyright (c) 1994-2009, ITT Visual Information Solutions. All
 * rights reserved. This software includes information which is
 * proprietary to and a trade secret of ITT Visual Information Solutions.
 * It is not to be disclosed to anyone outside of this organization.
 * Reproduction by any means whatsoever is prohibited without express
 * written permission.
 */

package com.rsi.idldt.core.internal.parser;
}


/////////////////////////////////////////////////
// IDL Parser
//
// The parser has been augmented with a new constructor that saves the lexer and
// compliation unit.  The lexer is needed so that token offsets can be determined.
// The compilation unit is needed for the creation of new routines.
//
// The offsets are acquired from the lexer.  The lexer normally just knows the line
// number and the column, but its consume() and newline() methods have been
// overridden in order to update variables that the parser can access.
//
// Observations:
//
// - The rules "caseStatement" and "switchStatement" don't try to match ELSE.  This
//   probably speeds up parsing.
//
// - The rule "miscStatement" matches things we don't care about, such as assignments,
//   procedure calls, etc.  To properly match, it needs to know what those types of
//   statements can begin with.  It also needs to know when to end.
//
// - Statements don't have to end with a new line.  For example:
//      if ( b ) then s1 else s2  ; comment
//   The NEWLINE rule is used very carefully.

{
import com.rsi.idldt.core.dom.IIDLCompilationUnit;
import com.rsi.idldt.core.dom.IIDLRoutine;
import com.rsi.idldt.core.dom.IIDLStructure;
import com.rsi.idldt.core.utils.Tracer;
}

class IDLParser extends Parser;

options
{
  k = 2;

  // from the Standard C example on the ANTLR website (apparently also for Java)
  codeGenMakeSwitchThreshold = 2;
  codeGenBitsetTestThreshold = 3;
}
        
{
  private IDLLexer lexer;
  private IIDLCompilationUnit compilationUnit;

  
  /* Creates an IDLParser and saves the lexer and compilation unit.
   *
   * The lexer is needed so that token offsets can be determined.  The compilation
   * unit is needed for the creation of new routines.
   */
  public IDLParser( IDLLexer l, IIDLCompilationUnit cu )
  {
    this( l );
    
    lexer = l;
    compilationUnit = cu;
  }
  
  /* Override error handlers so we can run through our tracer */
	public void reportError(RecognitionException ex) {
	  Tracer.trace(String.valueOf(ex));
	}
	
	public void reportError(String s) {
	  if (getFilename() == null) {
	    Tracer.trace("error: " + s);
	  }
	  else {
	    Tracer.trace(getFilename() + ": error: " + s);
	  }
	}
	
	public void reportWarning(String s) {
	  if (getFilename() == null) {
      Tracer.trace("warning: " + s);
    }
    else {
      Tracer.trace(getFilename() + ": warning: " + s);
    }
  }

}

compilationUnit
//	{ long t0 = System.currentTimeMillis(); }
	:	( routineDefinition | statement | NEWLINE )* ( t:END ( NEWLINE )* )? EOF
		{
			if ( t != null )
			  compilationUnit.setHasMainProg( true );
			  
//		  long t1 = System.currentTimeMillis();
//		  System.out.println( "Elapsed parse time: " + ((t1 - t0) / 1000.0) + " seconds" );
		}
	;

routineDefinition
	{
	  int routineOffset = 0;
	  StringBuffer id = new StringBuffer( 32 );
	  int idOffset = 0;
	  IIDLRoutine r = null;
	}
	:	( t1:PRO | t2:FUNC )
		{
		  routineOffset = lexer.getOffset( (t1 != null) ? t1 : t2 );
		}
		idOffset = routineIdentifer[ id ]
		{
		  r = compilationUnit.createRoutine( id.toString(), (t2 != null),
			routineOffset, -1,
		    idOffset, id.length() );

//		  System.out.println( (t1 != null) ? "Procedure:" : "Function:" );
//		  System.out.println( "  Name:      " + id +
//		    " @" + idOffset + " +" + id.length() );
		}
		parameterList[ r ] NEWLINE
		statementList
		e:END
		{
		  r.setLength( lexer.getOffset( e ) + e.getText().length() - routineOffset );

//		  System.out.println( "  Routine:   " +
//		    " @" + routineOffset +
//		    " +" + (lexer.getOffset( e ) + e.getText().length() - routineOffset) );
		}
	;
	
routineIdentifer [StringBuffer id] returns [int idOffset = 0]
	:	id1:IDENTIFIER ( COLON COLON id2:IDENTIFIER )?
		{
		  idOffset = lexer.getOffset( id1 );
		  
		  id.append( ( id2 == null ) ?
			id1.getText() : id1.getText() + "::" + id2.getText() );
		}
	;
	
parameterList [IIDLRoutine r]
	:	( COMMA parameter[ r ] )*
	;

parameter [IIDLRoutine r]
	:	param:IDENTIFIER ( EQUAL var:IDENTIFIER )?
		{
		  if ( r != null )
		  {
		    String id = param.getText();
		    int paramOffset = lexer.getOffset( param );
		    int paramLength = id.length();
		    
		    if ( var == null )
		      r.createParameter( id,
		        paramOffset, paramLength,
		        paramOffset, paramLength );
		    else		      
		      r.createKeyword( id, var.getText(),
		        paramOffset, lexer.getOffset( var ) + var.getText().length() - paramOffset,
		        paramOffset, paramLength );

//			if ( var == null )
//			  System.out.println( "  Parameter: " + id +
//			    " @" + paramOffset +
//			    " +" + paramLength );
//			else
//			  System.out.println( "  Keyword:   " + id + " = " + var.getText() +
//			    " @" + paramOffset +
//			    " +" + (lexer.getOffset( var ) + var.getText().length() - paramOffset) + "," + paramLength );
		  }
		}
	;
	
statementList
	:	( statement | NEWLINE )*
	;

statement
	:	( IDENTIFIER COLON ) => IDENTIFIER COLON
		( options { warnWhenFollowAmbig = false; } : statement )? // labeled statement
	|	ifStatement
	|	forStatement
	|	whileStatement
	|	repeatStatement
	|	caseStatement
	|	switchStatement
	|	beginStatement
	|	fowardStatement
	|	miscStatement			// assignment statements, procedure calls, etc.
	;

ifStatement
	:	IF
		( ~( THEN ) )*
		THEN
		( ( BEGIN ) => ( BEGIN statementList ( ENDIF | END ) )
		| statement
		)
		( options { warnWhenFollowAmbig = false; } :
		  ELSE
		  ( ( BEGIN ) => ( BEGIN statementList ( ENDELSE | END ) )
		  | statement
		  )
		)?
	;

forStatement
	:	FOR ( ~( DO ) )* DO
		( ( BEGIN ) => ( BEGIN statementList ( END | ENDFOR ) ) | statement )
	;
	
whileStatement
	:	WHILE ( ~( DO ) )* DO
		( ( BEGIN ) => ( BEGIN statementList ( END | ENDWHILE ) ) | statement )
	;
	
repeatStatement
	:	REPEAT
		( ( BEGIN ) => ( BEGIN statementList ( END | ENDREP ) ) | statement )
		UNTIL ( ( LPAREN ) => matchPair | )
	;
	
caseStatement
	:	CASE ( ~( OF ) )* OF NEWLINE
		( ( NEWLINE ) => NEWLINE
		| caseLabel COLON ( statement | NEWLINE )
		)*
		( END | ENDCASE )
	;
	
switchStatement
	:	SWITCH ( ~( OF ) )* OF NEWLINE
		( ( NEWLINE ) => NEWLINE
		| caseLabel COLON ( statement | NEWLINE )
		)*
		( END | ENDSWITCH )
	;

beginStatement
	:	BEGIN
		statementList
		( END | ENDIF | ENDELSE | ENDFOR | ENDWHILE | ENDREP | ENDCASE | ENDSWITCH )
	;
	
fowardStatement
	:	FORWARD IDENTIFIER ( options { greedy = true; } : COMMA IDENTIFIER )*
	;

miscStatement
	{ String structName = null; }
	:	( t1:IDENTIFIER | t2:LPAREN | t3:PLUS | t4:MINUS )
		( options { greedy = true; } :
		  ( CONSTANT ) => CONSTANT
		| structName = structureDefinition
		| ~( NEWLINE | LBRACE |
		     BEGIN | IF | FOR | WHILE | REPEAT | CASE | SWITCH |
		     END | ENDIF | ELSE | ENDELSE | ENDFOR | ENDWHILE |
		     ENDREP | UNTIL | ENDCASE | ENDSWITCH )
		)*
	;

caseLabel
	:	( ~( COLON | END | ENDCASE | ENDSWITCH ) )*
	;

structureDefinition returns [String structName = null]
	{
	  IIDLStructure s = null;
	}
	:	t1:LBRACE
		( ( IDENTIFIER RBRACE ) => id1:IDENTIFIER
		  {
		    structName = id1.getText();
		    
//		    System.out.println( "  Structure Reference:  " + structName );
		  }
		| ( IDENTIFIER COMMA ( ( IDENTIFIER COLON ) | INHERITS ) ) => id2:IDENTIFIER
		  {
		    structName = id2.getText();
		    s = compilationUnit.createStructure( id2.getText(),
		      lexer.getOffset( t1 ), -1,
		      lexer.getOffset( id2 ), id2.getText().length() );
   
//		    System.out.println( "  Structure Definition: " + structName );
		  }
		  ( COMMA structureElement[ s ] )*
		| ( IDENTIFIER COLON ) =>		// not a structure definition... eat tokens
		  {
		    s = compilationUnit.createStructure( null,
		      lexer.getOffset( t1 ), -1,
		      0, -1 );
		    structName = s.getElementName();
		    
//		    System.out.println( "  Anonymous Definition: " + structName );
		  }
		  ( structureElement[ s ] ( COMMA structureElement[ s ] )* )
		| ( ~( LBRACE | RBRACE ) )*
//		  {
//		    System.out.println( "  Assignment to Structure" );
//		  }
		)
		t2:RBRACE
		{
		  if ( s != null )
		  {
		    int offset1 = lexer.getOffset( t1 );
		    int offset2 = lexer.getOffset( t2 ) + t2.getText().length();
		    s.setLength( offset2 - offset1 );
		  
//		    System.out.println( "    @" + offset1 + " +" + (offset2 - offset1) );
		  }
		}
	;

structureElement [IIDLStructure s]
	{
	  String structName = null;
	}
	:	( ( INHERITS ) => t0:INHERITS t1:IDENTIFIER
		| ( IDENTIFIER COLON LBRACE ) => t2:IDENTIFIER COLON
		  structName = structureDefinition
		| ( IDENTIFIER COLON ~(LBRACE) ) => t3:IDENTIFIER COLON
		  ( matchPair
		  | structName = structureDefinition
		  | ~( COMMA | LBRACE | RBRACE | LPAREN | RPAREN | LBRACKET | RBRACKET )
		  )*
		)
		{
		  String fieldName = null;
		  boolean isInherit = false;
		  int offset = 0, length = 0;
		  int nameOffset = 0, nameLength = 0;
		  
		  if ( t1 != null )
		  {
		    // field is a pseudo-field... fields to be be determined when needed
		    fieldName = null;		// inherits implies anonymous field
		    isInherit = true;
		    structName = t1.getText();
		    
		    nameOffset = lexer.getOffset( t1 );
		    nameLength = t1.getText().length();
		    
		    offset = lexer.getOffset( t0 );
		    length = ( nameOffset + nameLength ) - offset; 
		  }
		  else if ( t2 != null )
		  {
		    // field represents a structure, whose name can be determined
		    fieldName = t2.getText();
		    offset = nameOffset = lexer.getOffset( t2 );
		    length = nameLength = t2.getText().length();
		  }
		  else if ( t3 != null )
		  {
		    // field represents something indeterminant (e.g. "id:(b?{Fu}:{Bar})")
		    fieldName = t3.getText();
		    offset = nameOffset = lexer.getOffset( t3 );
		    length = nameLength = t3.getText().length();		    
		  }
		  
		  s.createField( fieldName, "", isInherit, structName,
		    offset, length, nameOffset, nameLength );
		  
//		  if ( t1 != null )
//		    System.out.println( "    Inherits: " + t1.getText() );
//		  else if ( t2 != null )
//		    System.out.println( "    Element:  " + t2.getText() );
//		  else if ( t3 != null )
//		    System.out.println( "    Element:  " + t3.getText() );
		}
	;

matchPair
	{ String s; }
	:	LPAREN
		( ~( LPAREN | RPAREN | LBRACKET | RBRACKET | LBRACE )
		| s = structureDefinition
		| matchPair
		)*
		RPAREN
	|	LBRACKET
		( ~( LPAREN | RPAREN | LBRACKET | RBRACKET | LBRACE )
		| s = structureDefinition
		| matchPair
		)*
		RBRACKET
	;
	
/////////////////////////////////////////////////
// IDL Lexical Analyzer
//
// Keywords are first matched to IDENTIFIER, but since that rule tests literals
// the literals (keywords) take precedence.  For this to work, the keywords
// must be specified in a "tokens" block.
//
// We might be able to optimize by better use of the "filter" option.  It allows
// characters to be skipped, if they're not comprise a lexer token.  Incidentally,
// this is how we get by without having to specify the full alphabet (ASCII nor
// unicode!).

{
import java.lang.Integer;
import java.util.Vector;
}

class IDLLexer extends Lexer;

options {
	charVocabulary = '\3'..'\377' | '\u1000'..'\u1fff';
	filter = true;
	
	k = 2;
	caseSensitive = false;
	caseSensitiveLiterals = false;
	testLiterals = false;
}

tokens {
  // keywords that are scanned for
  FUNC		= "function";
  PRO		= "pro";
  END		= "end";
  
  BEGIN		= "begin";
  DO			= "do";
  
  IF			= "if";
  THEN		= "then";
  ENDIF		= "endif";
  ELSE		= "else";
  ENDELSE	= "endelse";

  FOR		= "for";
  ENDFOR		= "endfor";
  
  WHILE		= "while";
  ENDWHILE	= "endwhile";
  
  REPEAT		= "repeat";
  ENDREP		= "endrep";
  UNTIL		= "until";
  
  CASE		= "case";
  ENDCASE	= "endcase";
  SWITCH		= "switch";
  ENDSWITCH	= "endswitch";
  OF			= "of";
  
  INHERITS	= "inherits";
  
  FORWARD	= "forward_function";
}

{
  private int currentLine = 0;
  private int currentColumn = 0;
  private Vector lineLength = new Vector( 1024, 1024 );
  
  public IDLLexer( InputStream in, int tabSize )
  {
    this( in );
    setTabSize( 1 );
  }
  
  public void consume() throws antlr.CharStreamException
  {
    final char c = LA(1);
    super.consume();
    
    // Increment our column count used to keep track of line lengths for use in
    // mapping (line,column) to offsets.  Note: Use of lexical predicates (such as
    // in the NL rule) can cause consume() to be called twice, so we need to check
    // if the lexer is currently just guessing.
    // Also restrict the valid char range to filter out extra UTF8 bytes.
    if ( inputState.guessing == 0 && (c < 0x80 || c > 0xBF) ) {
      currentColumn++;
    }
  }
  
  public void newline()
  {
    super.newline();
    
    // Store the length of the line that was just completed.  Note that the line
    // lengths are cumulative so that offsets from the beginning of the file are
    // computed faster.
    
    int i = currentColumn;
    if ( currentLine > 0 )
      i += ((Integer)lineLength.elementAt( currentLine - 1 )).intValue();
	  
    lineLength.add( new Integer(i) );
    
    // update our internal counts
    currentLine++;
    currentColumn = 0;
  }

  public int getOffset( Token t )
  {
    int i = t.getLine() - 1;		// convert to zero-based index
    int c = t.getColumn();
    
    if ( i > 0 )
      c += ((Integer)lineLength.elementAt( i - 1 )).intValue();
      
    return c - 1;				// DOM wants zero-based offsets
  }
}

COLON:		':';
COMMA:		',';
PERIOD:		'.';
EQUAL:		'=';
DOLLAR:		'$';
LPAREN:		'(';
RPAREN:		')';
LBRACKET:	'[';
RBRACKET:	']';
LBRACE:		'{';
RBRACE:		'}';

protected SQUOTE:	'\'';
protected DQUOTE:	'"';
protected MINUS:		'-';
protected PLUS:		'+';

protected ALPHA:		'a'..'z';
protected UNDER:		"_";
protected BANG:		"!";

protected OCTAL_DIGIT:	'0'..'7';
protected DIGIT:			'0'..'9';

	
IDENTIFIER
	options { testLiterals = true; }	// allows keywords to take precedence
	:	( ALPHA | UNDER | BANG ) ( ALPHA | DIGIT | UNDER | BANG | DOLLAR )*
	;

CONSTANT
	:	( DQUOTE OCTAL_DIGIT ) => OCTAL_NUMBER
	|	STRING
	;

protected
STRING
	:	SQUOTE ( ~( '\'' | '\r' | '\n' ) | ( SQUOTE SQUOTE ) )* ( SQUOTE )?
	|	DQUOTE ( ~( '"' | '\r' | '\n' ) | ( DQUOTE DQUOTE ) )* ( DQUOTE )?
	;

protected	
OCTAL_NUMBER
	:	DQUOTE ( OCTAL_DIGIT )+
	;

protected
SIGN
	:	( PLUS | MINUS )
	;
	
BATCH
	:	'@' ( ~( '\r' | '\n' ) )*
		{ $setType(Token.SKIP); }
	;

LINE_CONTINUATION
	:	'$' ( ~( '\r' | '\n' | ';' ) )* ( ( COMMENT )? NL ( WS )* )+
		{
		  $setType(Token.SKIP); // comments and new lines are skipped until statment
		}
	;

COMMENT
	:	';' ( ~( '\r' | '\n' ) )*
		{ $setType(Token.SKIP); }
	;
	
LOGICAL_AND
	:	'&' '&'							// to distinguish from NEWLINE
		{ $setType(Token.SKIP); }
	;

protected
NL
	:	( ( '\r' '\n' ) => '\r' '\n'		// Windows
		| '\r'							// Macintosh
		| '\n'							// UNIX
		)
		{ newline(); }
	;
		
NEWLINE
	:	NL |	'&'							// full deal with statement separator
	;
		
WS
	:	( ' ' | '\t' )
		{ $setType(Token.SKIP); }
	;