// scanner.cpp -- scanner class implementation #include "scanner.h" #include "chset.h" #include "Token.h" #include "FileManager.h" #include "TokFiler.h" #include "rwtable.h" #include "idsymtab.h" #include #include #include scanner::scanner() { // scanner class constructor chset t1("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); chset t2("0123456789"); chset t3("+-*/=<>:(),;.^[]{}#'\""); // 20 delimiters chset t4("`~!@$%&_|'\\?"); chset t5("."); letters = t1; digits = t2; delimiters = t3; illegalChars = t4; decimal = t5; aRwtablePtr = new rwtable; anIdSymTabPtr = new idSymbolTable; lineNumber = 1; // line number starts with 1. word[79] = '\0'; // a word length will be 80, end with null char. strcatHelper[1] = '\0'; // helper for string concatenation. } // end of scanner constructor. void scanner::error(unsigned int n, unsigned int line) { cout << "\n\nError on line " << line << ", #" << n << ": "; switch (n) { case 1: cout << "Numeric constant contains a letter!"; break; case 2: cout << "Illegal symbol find in program!"; break; case 3: cout << "Numeric value too large!"; break; case 4: cout << "Sum of all identifier characters exceeds limit!"; break; case 5: cout << "Lang2 compiler not capable handling floats!"; break; case 6: cout << "Nested comments not allowed!"; break; case 7: cout << "Comments should end with right }"; break; } // end of switch. cout << "\n\n"; aFileManager.closeProgram(); aFileManager.seeErrorLines(line); aTokFiler.removeTokenFile(); exit(1); } // end of method error(). void scanner::scanIdentifier() { const NUMOFRWORDS = 35; while ( (ch != ' ') && (ch != '\\') && (ch != '\t') && (ch != '\n') && !delimiters.in(ch) && !illegalChars.in(ch) && !aFileManager.ENDOFPROGRAM ) { strcatHelper[0] = ch; strcat(word, strcatHelper); ch = aFileManager.getNextChar(); if (illegalChars.in(ch)) { strcatHelper[0] = ch; strcat(word, strcatHelper); error(2, lineNumber); } // end of if } // end of while loop. int rwHashValue = -1; if ( (aRwtablePtr != NULL) && (strlen(word) != 0) ) { rwHashValue = aRwtablePtr->rwhash(word); } if ( (rwHashValue >= 0) && (rwHashValue <= NUMOFRWORDS) ) { if (strcmp(word, aRwtablePtr->resWord(rwHashValue)) == 0) { tok.setClass(RESERVED); tok.setValue(rwHashValue); aTokFiler.emitToken(tok); } else { tok.setClass(IDENTIFIER); } } else { tok.setClass(IDENTIFIER); } // end of outer if-else. if (tok.getClass() == IDENTIFIER) { int ididx; bool insertOK; anIdSymTabPtr->searchAndMaybeInsert(word, ididx, insertOK); if (!insertOK) { error(4, lineNumber); } // end of inner if. tok.setValue(ididx); aTokFiler.emitToken(tok); } // end of outer if. } // end of method scanIdentifier(). void scanner::strToInt(unsigned int& n, bool& ok) { const maxVal = 32767; n = 0; ok = true; int i = 0, len = strlen(word); int digit; while (ok && (i < len)) { digit = word[i] - '0'; if (n < ( (maxVal - digit) / 10) ) { n = 10* n + digit; i++; } else { ok = false; } } // end of while } // end of method strToInt(). void scanner::scanNumeric() { while ( (ch != ' ') && (ch != '\\') && (ch != '\t') && (ch != '\n') && !(delimiters.in(ch)) && !(illegalChars.in(ch)) && !aFileManager.ENDOFPROGRAM ) { strcatHelper[0] = ch; strcat(word, strcatHelper); ch = aFileManager.getNextChar(); if (letters.in(ch)) { strcatHelper[0] = ch; strcat(word, strcatHelper); error(1, lineNumber); } else if (illegalChars.in(ch) ) { strcatHelper[0] = ch; strcat(word, strcatHelper); error(2, lineNumber); } else if (decimal.in(ch) ) { strcatHelper[0] = ch; strcat(word, strcatHelper); error(5, lineNumber); } // end of if-else-if. } // end of while. unsigned int value; bool ok; strToInt(value, ok); // w need not be passed since data member. if (!ok) error(3, lineNumber); tok.setClass(NUMBER); tok.setValue(value); aTokFiler.emitToken(tok); } // end of method scanNumerical(). void scanner::scanSpecial() { unsigned int special; strcatHelper[0] = ch; strcat(word, strcatHelper); switch (ch) { case ';': case '.': case ',': case '(': case ')': case '+': case '-': case '*': case '/': case '=': case '[': case ']': case '{': case '}': case '#': case '\'': case '"': case '^': switch (ch) { case ';': special = SEMICOLON; break; case '.': special = PERIOD; break; case ',': special = COMMA; break; case '(': special = LPAREN; break; case ')': special = RPAREN; break; case '+': special = PLUS; break; case '-': special = MINUS; break; case '*': special = MULTIPLY; break; case '/': special = DIVIDE; break; case '=': special = EQUAL; break; case '[': special = LSQUARE; break; case ']': special = RSQUARE; break; case '{': special = LCURLY; break; case '}': special = RCURLY; break; case '"': special = DQUOTE; break; case '\'': special = SQUOTE; break; case '#': special = NOTEQUAL; break; case '^': special = CARAT; break; } // end of inner switch. ch = aFileManager.getNextChar(); break; case ':': special = COLON; ch = aFileManager.getNextChar(); if (ch == '=') { strcatHelper[0] = ch; strcat(word, strcatHelper); special = COLONEQUAL; ch = aFileManager.getNextChar(); } break; case '>': special = GREATER; ch = aFileManager.getNextChar(); if (ch == '=') { strcatHelper[0] = ch; strcat(word, strcatHelper); special = GREATEREQUAL; ch = aFileManager.getNextChar(); } break; case '<': special = LESS; ch = aFileManager.getNextChar(); if (ch == '=') { strcatHelper[0] = ch; strcat(word, strcatHelper); special = LESSEQUAL; ch = aFileManager.getNextChar(); } break; } // end of switch. if (special == LCURLY) scanComments(special); tok.setClass(SYMBOL); tok.setValue(special); aTokFiler.emitToken(tok); } // end of method scanSpecial(). void scanner::scanComments(unsigned int special) { int lCurlyCnt = 0, rCurlyCnt = 0; lCurlyCnt++; if (ch == '\n') { // deal with NEWLINE lineNumber++; tok.setClass(NEWLINE); tok.setValue(lineNumber); aTokFiler.emitToken(tok); } // discard all comment blocks. while ((ch != '}') && !aFileManager.ENDOFPROGRAM) { ch = aFileManager.getNextChar(); if (ch == '\n') { lineNumber++; tok.setClass(NEWLINE); tok.setValue(lineNumber); aTokFiler.emitToken(tok); } if (ch == '{') { lCurlyCnt++; error(6, lineNumber); } } // end of while loop. if (ch == '}') { rCurlyCnt++; } // cout <<"In scanComments, lCurlyCnt == "< rCurlyCnt++) { error(7, lineNumber); } } char* scanner::getNextWord() { word[0] = '\0'; while ( (ch == ' ') || (ch == '\t') ) { // throw away white spaces. ch = aFileManager.getNextChar(); } if ( (ch=='\\') && (aFileManager.peekNextChar()=='\\') ) { //comments: from "\\" to end of current line while ( (ch != '\n') && !(aFileManager.ENDOFPROGRAM) ) { ch = aFileManager.getNextChar(); } // end of while. } if (letters.in(ch)) { scanIdentifier(); } else if (digits.in(ch)) { scanNumeric(); } else if (delimiters.in(ch)) { scanSpecial(); } else if (ch == '\n') { lineNumber++; tok.setClass(NEWLINE); tok.setValue(lineNumber); aTokFiler.emitToken(tok); ch = aFileManager.getNextChar(); } else if (aFileManager.ENDOFPROGRAM) { ; //do nothing -- eof reached } else { strcatHelper[0] = ch; strcat(word, strcatHelper); error(2, lineNumber); cout << "In getNextWord() 1, word == " << word << endl; } return word; } // end of method getNextWord(). void scanner::program() { aTokFiler.createTokenFile(); tok.setClass(NEWLINE); tok.setValue(lineNumber); aTokFiler.emitToken(tok); ch = aFileManager.getNextChar(); int wordCount = 0; aFileManager.ENDOFPROGRAM = false; while (!aFileManager.ENDOFPROGRAM) { char* aStr = getNextWord(); wordCount++; if (wordCount > 100) { aFileManager.ENDOFPROGRAM = true; } } tok.setClass(ENDOFTEXT); tok.setValue(0); aTokFiler.emitToken(tok); aTokFiler.closeTokenFile(); } // end of method program().