/* Scanner program */

#include <fstream.h>
#include <ctype.h>
#include <stdio.h>

typedef enum
{
  lessop, lesseqop, eqop, greatop, greateqop, assignop, plusop, minusop,
  timesop, divop, lparen, rparen, semicolon, comma, colon, scaneof,
  begin, end, IF, then, ELSE, MAIN, loop, exit, RETURN, INT, real,
  intliteral, realliteral, strliteral, read, write, andop, orop, notop, id
} token;

#define buf_size 30
char token_buffer[buf_size+1];
int buf_index;
int int_val;
double real_val;

#define str_size 80
char str_buffer[str_size+1];
int str_index;

void buffer_char(char c)
{
  if (buf_index<buf_size) token_buffer[buf_index++]=c;
}

token check_reserved()
{
  if (strcmp(token_buffer,"begin")==0) return begin;
  else if (strcmp(token_buffer,"end")==0) return end;
  else if (strcmp(token_buffer,"if")==0) return IF;
  else if (strcmp(token_buffer,"then")==0) return then;
  else if (strcmp(token_buffer,"else")==0) return ELSE;
  else if (strcmp(token_buffer,"main")==0) return MAIN;
  else if (strcmp(token_buffer,"loop")==0) return loop;
  else if (strcmp(token_buffer,"exit")==0) return exit;
  else if (strcmp(token_buffer,"return")==0) return RETURN;
  else if (strcmp(token_buffer,"int")==0) return INT;
  else if (strcmp(token_buffer,"real")==0) return real;
  else if (strcmp(token_buffer,"read")==0) return read;
  else if (strcmp(token_buffer,"write")==0) return write;
  else if (strcmp(token_buffer,"and")==0) return andop;
  else if (strcmp(token_buffer,"or")==0) return orop;
  else if (strcmp(token_buffer,"not")==0) return notop;
  else return id;
}

void lexical_error(char c)
{
  cout << "lexical error when reading character " << c << "ascii code ";
  cout << (int)c << endl;
}

token scanner(ifstream * fin)
{
  char in_char, c;
  buf_index = 0;

  while(1)
  {
    in_char = fin -> get();
    if  (fin -> eof()) return scaneof;
    if (isspace(in_char))
    {
      continue;
    }
    else if (isalpha(in_char))
    { // found identifier
      buffer_char(in_char);
      for (c=fin->peek();isalnum(c) || c=='_';c=fin->peek())
        buffer_char(fin -> get());
      token_buffer[buf_index] = 0;
      return check_reserved();
    }
    else if (isdigit(in_char))
    { // found number
      buffer_char(in_char);
      for (c=fin->peek();isdigit(c);c=fin->peek())
	buffer_char(fin->get());
      if (c=='.')
      { // is real
        buffer_char(fin -> get());
        c=fin -> get();
        if (isdigit(c))
	{ // correct real syntax
          buffer_char(c);
          for (c=fin->peek();isdigit(c);c=fin->peek())
            buffer_char(fin->get());
          token_buffer[buf_index] = 0;
          sscanf(token_buffer,"%lf",&real_val);
          return realliteral;
        }
        else
	{ // illegal real syntax
          lexical_error(c);
        }
      }
      else
      { // found integer
        token_buffer[buf_index] = 0;
        sscanf(token_buffer,"%d",&int_val);
        return intliteral;
      }
    }
    else if (in_char=='(') return lparen;
    else if (in_char==')') return rparen;
    else if (in_char==';') return semicolon;
    else if (in_char==':')
    {
      if (fin->peek() == '=')
      {
	fin -> get();
	return assignop;
      }
      else return colon;
    }
    else if (in_char==',') return comma;
    else if (in_char=='+') return plusop;
    else if (in_char=='-')
    {
      if (fin->peek()=='-')
      { // found comment
        do in_char=fin -> get();
        while (in_char != '\n');
      }
      else return minusop;
    }
    else if (in_char=='*') return timesop;
    else if (in_char=='/') return divop;
    else if (in_char=='=') return eqop;
    else if (in_char=='<')
    {
      if (fin->peek()=='=')
      {
	fin -> get();
	return lesseqop;
      }
      else return lessop;
    }
    else if (in_char=='>')
    {
      if (fin->peek()=='=')
      {
	fin -> get();
	return greateqop;
      }
      else return greatop;
    }
    else if (in_char == '"')
    { // found a string
      int i;
      str_index=0;
      do
      {
        c=fin -> get();
        if (c=='"')
        {
          do
            if (fin->peek()=='"')
            {
	      fin -> get();
              if (str_index<str_size) str_buffer[str_index++]='"';
              c=fin -> get();
	    }
          while (c=='"' && fin->peek()=='"');
	}
        if (c!='"' && str_index<str_size) str_buffer[str_index++]=c;
      }
      while (c!='"');
      str_buffer[str_index] = 0; // terminate string
      return strliteral;
    }
    else lexical_error(in_char);
  }
}
