parsejava.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. /**
  2. * Java parser for codemirror
  3. *
  4. * @author Patrick Wied
  5. */
  6. var JavaParser = Editor.Parser = (function() {
  7. // Token types that can be considered to be atoms.
  8. var atomicTypes = {"atom": true, "number": true, "string": true, "regexp": true};
  9. // Setting that can be used to have JSON data indent properly.
  10. var json = false;
  11. // Constructor for the lexical context objects.
  12. function JavaLexical(indented, column, type, align, prev, info) {
  13. // indentation at start of this line
  14. this.indented = indented;
  15. // column at which this scope was opened
  16. this.column = column;
  17. // type of scope ( 'stat' (statement), 'form' (special form), '[', '{', or '(')
  18. this.type = type;
  19. // '[', '{', or '(' blocks that have any text after their opening
  20. // character are said to be 'aligned' -- any lines below are
  21. // indented all the way to the opening character.
  22. if (align != null)
  23. this.align = align;
  24. // Parent scope, if any.
  25. this.prev = prev;
  26. this.info = info;
  27. }
  28. // java indentation rules.
  29. function indentJava(lexical) {
  30. return function(firstChars) {
  31. var firstChar = firstChars && firstChars.charAt(0), type = lexical.type;
  32. var closing = firstChar == type;
  33. if (type == "form" && firstChar == "{")
  34. return lexical.indented;
  35. else if (type == "stat" || type == "form")
  36. return lexical.indented + indentUnit;
  37. else if (lexical.info == "switch" && !closing)
  38. return lexical.indented + (/^(?:case|default)\b/.test(firstChars) ? indentUnit : 2 * indentUnit);
  39. else if (lexical.align)
  40. return lexical.column - (closing ? 1 : 0);
  41. else
  42. return lexical.indented + (closing ? 0 : indentUnit);
  43. };
  44. }
  45. // The parser-iterator-producing function itself.
  46. function parseJava(input, basecolumn) {
  47. // Wrap the input in a token stream
  48. var tokens = tokenizeJava(input);
  49. // The parser state. cc is a stack of actions that have to be
  50. // performed to finish the current statement. For example we might
  51. // know that we still need to find a closing parenthesis and a
  52. // semicolon. Actions at the end of the stack go first. It is
  53. // initialized with an infinitely looping action that consumes
  54. // whole statements.
  55. var cc = [statements];
  56. // The lexical scope, used mostly for indentation.
  57. var lexical = new JavaLexical(basecolumn || 0, 0, "block", false);
  58. // Current column, and the indentation at the start of the current
  59. // line. Used to create lexical scope objects.
  60. var column = 0;
  61. var indented = 0;
  62. // Variables which are used by the mark, cont, and pass functions
  63. // below to communicate with the driver loop in the 'next'
  64. // function.
  65. var consume, marked;
  66. // The iterator object.
  67. var parser = {next: next, copy: copy};
  68. function next(){
  69. // Start by performing any 'lexical' actions (adjusting the
  70. // lexical variable), or the operations below will be working
  71. // with the wrong lexical state.
  72. while(cc[cc.length - 1].lex)
  73. cc.pop()();
  74. // Fetch a token.
  75. var token = tokens.next();
  76. // Adjust column and indented.
  77. if (token.type == "whitespace" && column == 0)
  78. indented = token.value.length;
  79. column += token.value.length;
  80. if (token.content == "\n"){
  81. indented = column = 0;
  82. // If the lexical scope's align property is still undefined at
  83. // the end of the line, it is an un-aligned scope.
  84. if (!("align" in lexical))
  85. lexical.align = false;
  86. // Newline tokens get an indentation function associated with
  87. // them.
  88. token.indentation = indentJava(lexical);
  89. }
  90. // No more processing for meaningless tokens.
  91. if (token.type == "whitespace" || token.type == "comment" || token.type == "javadoc" || token.type == "annotation")
  92. return token;
  93. // When a meaningful token is found and the lexical scope's
  94. // align is undefined, it is an aligned scope.
  95. if (!("align" in lexical))
  96. lexical.align = true;
  97. // Execute actions until one 'consumes' the token and we can
  98. // return it.
  99. while(true) {
  100. consume = marked = false;
  101. // Take and execute the topmost action.
  102. cc.pop()(token.type, token.content);
  103. if (consume){
  104. // Marked is used to change the style of the current token.
  105. if (marked)
  106. token.style = marked;
  107. return token;
  108. }
  109. }
  110. }
  111. // This makes a copy of the parser state. It stores all the
  112. // stateful variables in a closure, and returns a function that
  113. // will restore them when called with a new input stream. Note
  114. // that the cc array has to be copied, because it is contantly
  115. // being modified. Lexical objects are not mutated, and context
  116. // objects are not mutated in a harmful way, so they can be shared
  117. // between runs of the parser.
  118. function copy(){
  119. var _lexical = lexical, _cc = cc.concat([]), _tokenState = tokens.state;
  120. return function copyParser(input){
  121. lexical = _lexical;
  122. cc = _cc.concat([]); // copies the array
  123. column = indented = 0;
  124. tokens = tokenizeJava(input, _tokenState);
  125. return parser;
  126. };
  127. }
  128. // Helper function for pushing a number of actions onto the cc
  129. // stack in reverse order.
  130. function push(fs){
  131. for (var i = fs.length - 1; i >= 0; i--)
  132. cc.push(fs[i]);
  133. }
  134. // cont and pass are used by the action functions to add other
  135. // actions to the stack. cont will cause the current token to be
  136. // consumed, pass will leave it for the next action.
  137. function cont(){
  138. push(arguments);
  139. consume = true;
  140. }
  141. function pass(){
  142. push(arguments);
  143. consume = false;
  144. }
  145. // Used to change the style of the current token.
  146. function mark(style){
  147. marked = style;
  148. }
  149. // Push a new lexical context of the given type.
  150. function pushlex(type, info) {
  151. var result = function(){
  152. lexical = new JavaLexical(indented, column, type, null, lexical, info)
  153. };
  154. result.lex = true;
  155. return result;
  156. }
  157. // Pop off the current lexical context.
  158. function poplex(){
  159. lexical = lexical.prev;
  160. }
  161. poplex.lex = true;
  162. // The 'lex' flag on these actions is used by the 'next' function
  163. // to know they can (and have to) be ran before moving on to the
  164. // next token.
  165. // Creates an action that discards tokens until it finds one of
  166. // the given type.
  167. function expect(wanted){
  168. return function expecting(type){
  169. if (type == wanted) cont();
  170. else cont(arguments.callee);
  171. };
  172. }
  173. // Looks for a statement, and then calls itself.
  174. function statements(type){
  175. return pass(statement, statements);
  176. }
  177. // Dispatches various types of statements based on the type of the
  178. // current token.
  179. function statement(type){
  180. if (type == "keyword a") cont(pushlex("form"), expression, statement, poplex);
  181. else if (type == "keyword b") cont(pushlex("form"), statement, poplex);
  182. else if (type == "{") cont(pushlex("}"), block, poplex);
  183. else if (type == "for") cont(pushlex("form"), expect("("), pushlex(")"), forspec1, expect(")"), poplex, statement, poplex);
  184. else if (type == "variable") cont(pushlex("stat"), maybelabel);
  185. else if (type == "switch") cont(pushlex("form"), expression, pushlex("}", "switch"), expect("{"), block, poplex, poplex);
  186. else if (type == "case") cont(expression, expect(":"));
  187. else if (type == "default") cont(expect(":"));
  188. else if (type == "catch") cont(pushlex("form"), expect("("), function(){}, expect(")"), statement, poplex);
  189. else if (type == "class") cont();
  190. else if (type == "interface") cont();
  191. else if (type == "keyword c") cont(statement);
  192. else pass(pushlex("stat"), expression, expect(";"), poplex);
  193. }
  194. // Dispatch expression types.
  195. function expression(type){
  196. if (atomicTypes.hasOwnProperty(type)) cont(maybeoperator);
  197. //else if (type == "function") cont(functiondef);
  198. else if (type == "keyword c") cont(expression);
  199. else if (type == "(") cont(pushlex(")"), expression, expect(")"), poplex, maybeoperator);
  200. else if (type == "operator") cont(expression);
  201. else if (type == "[") cont(pushlex("]"), commasep(expression, "]"), poplex, maybeoperator);
  202. }
  203. // Called for places where operators, function calls, or
  204. // subscripts are valid. Will skip on to the next action if none
  205. // is found.
  206. function maybeoperator(type){
  207. if (type == "operator") cont(expression);
  208. else if (type == "(") cont(pushlex(")"), expression, commasep(expression, ")"), poplex, maybeoperator);
  209. else if (type == "[") cont(pushlex("]"), expression, expect("]"), poplex, maybeoperator);
  210. }
  211. // When a statement starts with a variable name, it might be a
  212. // label. If no colon follows, it's a regular statement.
  213. function maybelabel(type){
  214. if (type == "(") cont(commasep(function(){}, ")"), poplex, statement); // method definition
  215. else if (type == "{") cont(poplex, pushlex("}"), block, poplex); // property definition
  216. else pass(maybeoperator, expect(";"), poplex);
  217. }
  218. // Parses a comma-separated list of the things that are recognized
  219. // by the 'what' argument.
  220. function commasep(what, end){
  221. function proceed(type) {
  222. if (type == ",") cont(what, proceed);
  223. else if (type == end) cont();
  224. else cont(expect(end));
  225. };
  226. return function commaSeparated(type) {
  227. if (type == end) cont();
  228. else pass(what, proceed);
  229. };
  230. }
  231. // Look for statements until a closing brace is found.
  232. function block(type){
  233. if (type == "}") cont();
  234. else pass(statement, block);
  235. }
  236. // For loops.
  237. function forspec1(type){
  238. if (type == ";") pass(forspec2);
  239. else pass(forspec2);
  240. }
  241. function formaybein(type, value){
  242. if (value == "in") cont(expression);
  243. else cont(maybeoperator, forspec2);
  244. }
  245. function forspec2(type, value){
  246. if (type == ";") cont(forspec3);
  247. else if (value == "in") cont(expression);
  248. else cont(expression, expect(";"), forspec3);
  249. }
  250. function forspec3(type) {
  251. if (type == ")") pass();
  252. else cont(expression);
  253. }
  254. return parser;
  255. }
  256. return {
  257. make: parseJava,
  258. electricChars: "{}:",
  259. configure: function(obj) {
  260. if (obj.json != null) json = obj.json;
  261. }
  262. };
  263. })();