Ruby 3.4.1p0 (2024-12-25 revision 48d4efcb85000e1ebae42004e963b5d0cedddcf2)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17// Macros for min/max.
18#define MIN(a,b) (((a)<(b))?(a):(b))
19#define MAX(a,b) (((a)>(b))?(a):(b))
20
21/******************************************************************************/
22/* Lex mode manipulations */
23/******************************************************************************/
24
29static inline uint8_t
30lex_mode_incrementor(const uint8_t start) {
31 switch (start) {
32 case '(':
33 case '[':
34 case '{':
35 case '<':
36 return start;
37 default:
38 return '\0';
39 }
40}
41
46static inline uint8_t
47lex_mode_terminator(const uint8_t start) {
48 switch (start) {
49 case '(':
50 return ')';
51 case '[':
52 return ']';
53 case '{':
54 return '}';
55 case '<':
56 return '>';
57 default:
58 return start;
59 }
60}
61
67static bool
68lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
69 lex_mode.prev = parser->lex_modes.current;
70 parser->lex_modes.index++;
71
72 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
74 if (parser->lex_modes.current == NULL) return false;
75
76 *parser->lex_modes.current = lex_mode;
77 } else {
78 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
79 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
80 }
81
82 return true;
83}
84
88static inline bool
89lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
90 uint8_t incrementor = lex_mode_incrementor(delimiter);
91 uint8_t terminator = lex_mode_terminator(delimiter);
92
93 pm_lex_mode_t lex_mode = {
94 .mode = PM_LEX_LIST,
95 .as.list = {
96 .nesting = 0,
97 .interpolation = interpolation,
98 .incrementor = incrementor,
99 .terminator = terminator
100 }
101 };
102
103 // These are the places where we need to split up the content of the list.
104 // We'll use strpbrk to find the first of these characters.
105 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
106 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
107 size_t index = 7;
108
109 // Now we'll add the terminator to the list of breakpoints. If the
110 // terminator is not already a NULL byte, add it to the list.
111 if (terminator != '\0') {
112 breakpoints[index++] = terminator;
113 }
114
115 // If interpolation is allowed, then we're going to check for the #
116 // character. Otherwise we'll only look for escapes and the terminator.
117 if (interpolation) {
118 breakpoints[index++] = '#';
119 }
120
121 // If there is an incrementor, then we'll check for that as well.
122 if (incrementor != '\0') {
123 breakpoints[index++] = incrementor;
124 }
125
126 parser->explicit_encoding = NULL;
127 return lex_mode_push(parser, lex_mode);
128}
129
135static inline bool
136lex_mode_push_list_eof(pm_parser_t *parser) {
137 return lex_mode_push_list(parser, false, '\0');
138}
139
143static inline bool
144lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
145 pm_lex_mode_t lex_mode = {
146 .mode = PM_LEX_REGEXP,
147 .as.regexp = {
148 .nesting = 0,
149 .incrementor = incrementor,
150 .terminator = terminator
151 }
152 };
153
154 // These are the places where we need to split up the content of the
155 // regular expression. We'll use strpbrk to find the first of these
156 // characters.
157 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
158 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
159 size_t index = 4;
160
161 // First we'll add the terminator.
162 if (terminator != '\0') {
163 breakpoints[index++] = terminator;
164 }
165
166 // Next, if there is an incrementor, then we'll check for that as well.
167 if (incrementor != '\0') {
168 breakpoints[index++] = incrementor;
169 }
170
171 parser->explicit_encoding = NULL;
172 return lex_mode_push(parser, lex_mode);
173}
174
178static inline bool
179lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
180 pm_lex_mode_t lex_mode = {
181 .mode = PM_LEX_STRING,
182 .as.string = {
183 .nesting = 0,
184 .interpolation = interpolation,
185 .label_allowed = label_allowed,
186 .incrementor = incrementor,
187 .terminator = terminator
188 }
189 };
190
191 // These are the places where we need to split up the content of the
192 // string. We'll use strpbrk to find the first of these characters.
193 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
194 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
195 size_t index = 3;
196
197 // Now add in the terminator. If the terminator is not already a NULL byte,
198 // then we'll add it.
199 if (terminator != '\0') {
200 breakpoints[index++] = terminator;
201 }
202
203 // If interpolation is allowed, then we're going to check for the #
204 // character. Otherwise we'll only look for escapes and the terminator.
205 if (interpolation) {
206 breakpoints[index++] = '#';
207 }
208
209 // If we have an incrementor, then we'll add that in as a breakpoint as
210 // well.
211 if (incrementor != '\0') {
212 breakpoints[index++] = incrementor;
213 }
214
215 parser->explicit_encoding = NULL;
216 return lex_mode_push(parser, lex_mode);
217}
218
224static inline bool
225lex_mode_push_string_eof(pm_parser_t *parser) {
226 return lex_mode_push_string(parser, false, false, '\0', '\0');
227}
228
234static void
235lex_mode_pop(pm_parser_t *parser) {
236 if (parser->lex_modes.index == 0) {
237 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
238 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
239 parser->lex_modes.index--;
240 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
241 } else {
242 parser->lex_modes.index--;
243 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
244 xfree(parser->lex_modes.current);
245 parser->lex_modes.current = prev;
246 }
247}
248
252static inline bool
253lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
254 return parser->lex_state & state;
255}
256
257typedef enum {
258 PM_IGNORED_NEWLINE_NONE = 0,
259 PM_IGNORED_NEWLINE_ALL,
260 PM_IGNORED_NEWLINE_PATTERN
261} pm_ignored_newline_type_t;
262
263static inline pm_ignored_newline_type_t
264lex_state_ignored_p(pm_parser_t *parser) {
265 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
266
267 if (ignored) {
268 return PM_IGNORED_NEWLINE_ALL;
269 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
270 return PM_IGNORED_NEWLINE_PATTERN;
271 } else {
272 return PM_IGNORED_NEWLINE_NONE;
273 }
274}
275
276static inline bool
277lex_state_beg_p(pm_parser_t *parser) {
278 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
279}
280
281static inline bool
282lex_state_arg_p(pm_parser_t *parser) {
283 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
284}
285
286static inline bool
287lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
288 if (parser->current.end >= parser->end) {
289 return false;
290 }
291 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
292}
293
294static inline bool
295lex_state_end_p(pm_parser_t *parser) {
296 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
297}
298
302static inline bool
303lex_state_operator_p(pm_parser_t *parser) {
304 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
305}
306
311static inline void
312lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
313 parser->lex_state = state;
314}
315
316#ifndef PM_DEBUG_LOGGING
321#define PM_DEBUG_LOGGING 0
322#endif
323
324#if PM_DEBUG_LOGGING
325PRISM_ATTRIBUTE_UNUSED static void
326debug_state(pm_parser_t *parser) {
327 fprintf(stderr, "STATE: ");
328 bool first = true;
329
330 if (parser->lex_state == PM_LEX_STATE_NONE) {
331 fprintf(stderr, "NONE\n");
332 return;
333 }
334
335#define CHECK_STATE(state) \
336 if (parser->lex_state & state) { \
337 if (!first) fprintf(stderr, "|"); \
338 fprintf(stderr, "%s", #state); \
339 first = false; \
340 }
341
342 CHECK_STATE(PM_LEX_STATE_BEG)
343 CHECK_STATE(PM_LEX_STATE_END)
344 CHECK_STATE(PM_LEX_STATE_ENDARG)
345 CHECK_STATE(PM_LEX_STATE_ENDFN)
346 CHECK_STATE(PM_LEX_STATE_ARG)
347 CHECK_STATE(PM_LEX_STATE_CMDARG)
348 CHECK_STATE(PM_LEX_STATE_MID)
349 CHECK_STATE(PM_LEX_STATE_FNAME)
350 CHECK_STATE(PM_LEX_STATE_DOT)
351 CHECK_STATE(PM_LEX_STATE_CLASS)
352 CHECK_STATE(PM_LEX_STATE_LABEL)
353 CHECK_STATE(PM_LEX_STATE_LABELED)
354 CHECK_STATE(PM_LEX_STATE_FITEM)
355
356#undef CHECK_STATE
357
358 fprintf(stderr, "\n");
359}
360
361static void
362debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
363 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
364 debug_state(parser);
365 lex_state_set(parser, state);
366 fprintf(stderr, "Now: ");
367 debug_state(parser);
368 fprintf(stderr, "\n");
369}
370
371#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
372#endif
373
374/******************************************************************************/
375/* Command-line macro helpers */
376/******************************************************************************/
377
379#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
380
382#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
383
385#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
386
388#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
389
391#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
392
394#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
395
397#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
398
399/******************************************************************************/
400/* Diagnostic-related functions */
401/******************************************************************************/
402
406static inline void
407pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
408 pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
409}
410
414#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
415 pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
416
421static inline void
422pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
423 pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
424}
425
430#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
431 PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
432
437static inline void
438pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
439 pm_parser_err(parser, node->location.start, node->location.end, diag_id);
440}
441
446#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
447 PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
448
453#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
454 PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
455
460static inline void
461pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
462 pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
463}
464
469static inline void
470pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
471 pm_parser_err(parser, token->start, token->end, diag_id);
472}
473
478#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
479 PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
480
485#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
486 PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
487
491static inline void
492pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
493 pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
494}
495
500static inline void
501pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
502 pm_parser_warn(parser, token->start, token->end, diag_id);
503}
504
509static inline void
510pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
511 pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
512}
513
517#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
518 pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
519
524#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
525 PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
526
531#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
532 PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
533
538#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
539 PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
540
546static void
547pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
548 PM_PARSER_ERR_FORMAT(
549 parser,
550 ident_start,
551 ident_start + ident_length,
552 PM_ERR_HEREDOC_TERM,
553 (int) ident_length,
554 (const char *) ident_start
555 );
556}
557
558/******************************************************************************/
559/* Scope-related functions */
560/******************************************************************************/
561
565static bool
566pm_parser_scope_push(pm_parser_t *parser, bool closed) {
567 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
568 if (scope == NULL) return false;
569
570 *scope = (pm_scope_t) {
571 .previous = parser->current_scope,
572 .locals = { 0 },
573 .parameters = PM_SCOPE_PARAMETERS_NONE,
574 .implicit_parameters = { 0 },
575 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
576 .closed = closed
577 };
578
579 parser->current_scope = scope;
580 return true;
581}
582
587static bool
588pm_parser_scope_toplevel_p(pm_parser_t *parser) {
589 pm_scope_t *scope = parser->current_scope;
590
591 do {
592 if (scope->previous == NULL) return true;
593 if (scope->closed) return false;
594 } while ((scope = scope->previous) != NULL);
595
596 assert(false && "unreachable");
597 return true;
598}
599
603static pm_scope_t *
604pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
605 pm_scope_t *scope = parser->current_scope;
606
607 while (depth-- > 0) {
608 assert(scope != NULL);
609 scope = scope->previous;
610 }
611
612 return scope;
613}
614
615typedef enum {
616 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
617 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
618 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
619} pm_scope_forwarding_param_check_result_t;
620
621static pm_scope_forwarding_param_check_result_t
622pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
623 pm_scope_t *scope = parser->current_scope;
624 bool conflict = false;
625
626 while (scope != NULL) {
627 if (scope->parameters & mask) {
628 if (scope->closed) {
629 if (conflict) {
630 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
631 } else {
632 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
633 }
634 }
635
636 conflict = true;
637 }
638
639 if (scope->closed) break;
640 scope = scope->previous;
641 }
642
643 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
644}
645
646static void
647pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
648 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
649 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
650 // Pass.
651 break;
652 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
653 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
654 break;
655 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
656 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
657 break;
658 }
659}
660
661static void
662pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
663 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
664 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
665 // Pass.
666 break;
667 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
668 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
669 break;
670 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
671 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
672 break;
673 }
674}
675
676static void
677pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
678 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
679 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
680 // Pass.
681 break;
682 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
683 // This shouldn't happen, because ... is not allowed in the
684 // declaration of blocks. If we get here, we assume we already have
685 // an error for this.
686 break;
687 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
688 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
689 break;
690 }
691}
692
693static void
694pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
695 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
696 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
697 // Pass.
698 break;
699 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
700 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
701 break;
702 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
703 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
704 break;
705 }
706}
707
712pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
713 return parser->current_scope->shareable_constant;
714}
715
720static void
721pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
722 pm_scope_t *scope = parser->current_scope;
723
724 do {
725 scope->shareable_constant = shareable_constant;
726 } while (!scope->closed && (scope = scope->previous) != NULL);
727}
728
729/******************************************************************************/
730/* Local variable-related functions */
731/******************************************************************************/
732
736#define PM_LOCALS_HASH_THRESHOLD 9
737
738static void
739pm_locals_free(pm_locals_t *locals) {
740 if (locals->capacity > 0) {
741 xfree(locals->locals);
742 }
743}
744
749static uint32_t
750pm_locals_hash(pm_constant_id_t name) {
751 name = ((name >> 16) ^ name) * 0x45d9f3b;
752 name = ((name >> 16) ^ name) * 0x45d9f3b;
753 name = (name >> 16) ^ name;
754 return name;
755}
756
761static void
762pm_locals_resize(pm_locals_t *locals) {
763 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
764 assert(next_capacity > locals->capacity);
765
766 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
767 if (next_locals == NULL) abort();
768
769 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
770 if (locals->size > 0) {
771 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
772 }
773 } else {
774 // If we just switched from a list to a hash, then we need to fill in
775 // the hash values of all of the locals.
776 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
777 uint32_t mask = next_capacity - 1;
778
779 for (uint32_t index = 0; index < locals->capacity; index++) {
780 pm_local_t *local = &locals->locals[index];
781
782 if (local->name != PM_CONSTANT_ID_UNSET) {
783 if (hash_needed) local->hash = pm_locals_hash(local->name);
784
785 uint32_t hash = local->hash;
786 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
787 next_locals[hash & mask] = *local;
788 }
789 }
790 }
791
792 pm_locals_free(locals);
793 locals->locals = next_locals;
794 locals->capacity = next_capacity;
795}
796
812static bool
813pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
814 if (locals->size >= (locals->capacity / 4 * 3)) {
815 pm_locals_resize(locals);
816 }
817
818 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
819 for (uint32_t index = 0; index < locals->capacity; index++) {
820 pm_local_t *local = &locals->locals[index];
821
822 if (local->name == PM_CONSTANT_ID_UNSET) {
823 *local = (pm_local_t) {
824 .name = name,
825 .location = { .start = start, .end = end },
826 .index = locals->size++,
827 .reads = reads,
828 .hash = 0
829 };
830 return true;
831 } else if (local->name == name) {
832 return false;
833 }
834 }
835 } else {
836 uint32_t mask = locals->capacity - 1;
837 uint32_t hash = pm_locals_hash(name);
838 uint32_t initial_hash = hash;
839
840 do {
841 pm_local_t *local = &locals->locals[hash & mask];
842
843 if (local->name == PM_CONSTANT_ID_UNSET) {
844 *local = (pm_local_t) {
845 .name = name,
846 .location = { .start = start, .end = end },
847 .index = locals->size++,
848 .reads = reads,
849 .hash = initial_hash
850 };
851 return true;
852 } else if (local->name == name) {
853 return false;
854 } else {
855 hash++;
856 }
857 } while ((hash & mask) != initial_hash);
858 }
859
860 assert(false && "unreachable");
861 return true;
862}
863
868static uint32_t
869pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
870 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
871 for (uint32_t index = 0; index < locals->size; index++) {
872 pm_local_t *local = &locals->locals[index];
873 if (local->name == name) return index;
874 }
875 } else {
876 uint32_t mask = locals->capacity - 1;
877 uint32_t hash = pm_locals_hash(name);
878 uint32_t initial_hash = hash & mask;
879
880 do {
881 pm_local_t *local = &locals->locals[hash & mask];
882
883 if (local->name == PM_CONSTANT_ID_UNSET) {
884 return UINT32_MAX;
885 } else if (local->name == name) {
886 return hash & mask;
887 } else {
888 hash++;
889 }
890 } while ((hash & mask) != initial_hash);
891 }
892
893 return UINT32_MAX;
894}
895
900static void
901pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
902 uint32_t index = pm_locals_find(locals, name);
903 assert(index != UINT32_MAX);
904
905 pm_local_t *local = &locals->locals[index];
906 assert(local->reads < UINT32_MAX);
907
908 local->reads++;
909}
910
915static void
916pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
917 uint32_t index = pm_locals_find(locals, name);
918 assert(index != UINT32_MAX);
919
920 pm_local_t *local = &locals->locals[index];
921 assert(local->reads > 0);
922
923 local->reads--;
924}
925
929static uint32_t
930pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
931 uint32_t index = pm_locals_find(locals, name);
932 assert(index != UINT32_MAX);
933
934 return locals->locals[index].reads;
935}
936
945static void
946pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
947 pm_constant_id_list_init_capacity(list, locals->size);
948
949 // If we're still below the threshold for switching to a hash, then we only
950 // need to loop over the locals until we hit the size because the locals are
951 // stored in a list.
952 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
953
954 // We will only warn for unused variables if we're not at the top level, or
955 // if we're parsing a file outside of eval or -e.
956 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
957
958 for (uint32_t index = 0; index < capacity; index++) {
959 pm_local_t *local = &locals->locals[index];
960
961 if (local->name != PM_CONSTANT_ID_UNSET) {
962 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
963
964 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
965 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
966
967 if (constant->length >= 1 && *constant->start != '_') {
968 PM_PARSER_WARN_FORMAT(
969 parser,
970 local->location.start,
971 local->location.end,
972 PM_WARN_UNUSED_LOCAL_VARIABLE,
973 (int) constant->length,
974 (const char *) constant->start
975 );
976 }
977 }
978 }
979 }
980}
981
982/******************************************************************************/
983/* Node-related functions */
984/******************************************************************************/
985
989static inline pm_constant_id_t
990pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
991 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
992}
993
997static inline pm_constant_id_t
998pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
999 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1000}
1001
1005static inline pm_constant_id_t
1006pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1007 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1008}
1009
1013static inline pm_constant_id_t
1014pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1015 return pm_parser_constant_id_location(parser, token->start, token->end);
1016}
1017
1022static inline pm_constant_id_t
1023pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1024 return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1025}
1026
1032static pm_node_t *
1033pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1034 pm_node_t *void_node = NULL;
1035
1036 while (node != NULL) {
1037 switch (PM_NODE_TYPE(node)) {
1038 case PM_RETURN_NODE:
1039 case PM_BREAK_NODE:
1040 case PM_NEXT_NODE:
1041 case PM_REDO_NODE:
1042 case PM_RETRY_NODE:
1044 return void_node != NULL ? void_node : node;
1046 return NULL;
1047 case PM_BEGIN_NODE: {
1048 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1049
1050 if (cast->ensure_clause != NULL) {
1051 if (cast->rescue_clause != NULL) {
1052 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
1053 if (vn != NULL) return vn;
1054 }
1055
1056 if (cast->statements != NULL) {
1057 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1058 if (vn != NULL) return vn;
1059 }
1060
1061 node = (pm_node_t *) cast->ensure_clause;
1062 } else if (cast->rescue_clause != NULL) {
1063 if (cast->statements == NULL) return NULL;
1064
1065 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1066 if (vn == NULL) return NULL;
1067 if (void_node == NULL) void_node = vn;
1068
1069 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1070 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
1071 if (vn == NULL) {
1072 void_node = NULL;
1073 break;
1074 }
1075 if (void_node == NULL) {
1076 void_node = vn;
1077 }
1078 }
1079
1080 if (cast->else_clause != NULL) {
1081 node = (pm_node_t *) cast->else_clause;
1082 } else {
1083 return void_node;
1084 }
1085 } else {
1086 node = (pm_node_t *) cast->statements;
1087 }
1088
1089 break;
1090 }
1091 case PM_ENSURE_NODE: {
1092 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1093 node = (pm_node_t *) cast->statements;
1094 break;
1095 }
1096 case PM_PARENTHESES_NODE: {
1098 node = (pm_node_t *) cast->body;
1099 break;
1100 }
1101 case PM_STATEMENTS_NODE: {
1103 node = cast->body.nodes[cast->body.size - 1];
1104 break;
1105 }
1106 case PM_IF_NODE: {
1107 pm_if_node_t *cast = (pm_if_node_t *) node;
1108 if (cast->statements == NULL || cast->subsequent == NULL) {
1109 return NULL;
1110 }
1111 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1112 if (vn == NULL) {
1113 return NULL;
1114 }
1115 if (void_node == NULL) {
1116 void_node = vn;
1117 }
1118 node = cast->subsequent;
1119 break;
1120 }
1121 case PM_UNLESS_NODE: {
1122 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1123 if (cast->statements == NULL || cast->else_clause == NULL) {
1124 return NULL;
1125 }
1126 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1127 if (vn == NULL) {
1128 return NULL;
1129 }
1130 if (void_node == NULL) {
1131 void_node = vn;
1132 }
1133 node = (pm_node_t *) cast->else_clause;
1134 break;
1135 }
1136 case PM_ELSE_NODE: {
1137 pm_else_node_t *cast = (pm_else_node_t *) node;
1138 node = (pm_node_t *) cast->statements;
1139 break;
1140 }
1141 case PM_AND_NODE: {
1142 pm_and_node_t *cast = (pm_and_node_t *) node;
1143 node = cast->left;
1144 break;
1145 }
1146 case PM_OR_NODE: {
1147 pm_or_node_t *cast = (pm_or_node_t *) node;
1148 node = cast->left;
1149 break;
1150 }
1153
1154 pm_scope_t *scope = parser->current_scope;
1155 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1156
1157 pm_locals_read(&scope->locals, cast->name);
1158 return NULL;
1159 }
1160 default:
1161 return NULL;
1162 }
1163 }
1164
1165 return NULL;
1166}
1167
1168static inline void
1169pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1170 pm_node_t *void_node = pm_check_value_expression(parser, node);
1171 if (void_node != NULL) {
1172 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1173 }
1174}
1175
1179static void
1180pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1181 const char *type = NULL;
1182 int length = 0;
1183
1184 switch (PM_NODE_TYPE(node)) {
1191 type = "a variable";
1192 length = 10;
1193 break;
1194 case PM_CALL_NODE: {
1195 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1196 if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1197
1198 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1199 switch (message->length) {
1200 case 1:
1201 switch (message->start[0]) {
1202 case '+':
1203 case '-':
1204 case '*':
1205 case '/':
1206 case '%':
1207 case '|':
1208 case '^':
1209 case '&':
1210 case '>':
1211 case '<':
1212 type = (const char *) message->start;
1213 length = 1;
1214 break;
1215 }
1216 break;
1217 case 2:
1218 switch (message->start[1]) {
1219 case '=':
1220 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1221 type = (const char *) message->start;
1222 length = 2;
1223 }
1224 break;
1225 case '@':
1226 if (message->start[0] == '+' || message->start[0] == '-') {
1227 type = (const char *) message->start;
1228 length = 2;
1229 }
1230 break;
1231 case '*':
1232 if (message->start[0] == '*') {
1233 type = (const char *) message->start;
1234 length = 2;
1235 }
1236 break;
1237 }
1238 break;
1239 case 3:
1240 if (memcmp(message->start, "<=>", 3) == 0) {
1241 type = "<=>";
1242 length = 3;
1243 }
1244 break;
1245 }
1246
1247 break;
1248 }
1250 type = "::";
1251 length = 2;
1252 break;
1254 type = "a constant";
1255 length = 10;
1256 break;
1257 case PM_DEFINED_NODE:
1258 type = "defined?";
1259 length = 8;
1260 break;
1261 case PM_FALSE_NODE:
1262 type = "false";
1263 length = 5;
1264 break;
1265 case PM_FLOAT_NODE:
1266 case PM_IMAGINARY_NODE:
1267 case PM_INTEGER_NODE:
1270 case PM_RATIONAL_NODE:
1275 case PM_STRING_NODE:
1276 case PM_SYMBOL_NODE:
1277 type = "a literal";
1278 length = 9;
1279 break;
1280 case PM_NIL_NODE:
1281 type = "nil";
1282 length = 3;
1283 break;
1284 case PM_RANGE_NODE: {
1285 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1286
1288 type = "...";
1289 length = 3;
1290 } else {
1291 type = "..";
1292 length = 2;
1293 }
1294
1295 break;
1296 }
1297 case PM_SELF_NODE:
1298 type = "self";
1299 length = 4;
1300 break;
1301 case PM_TRUE_NODE:
1302 type = "true";
1303 length = 4;
1304 break;
1305 default:
1306 break;
1307 }
1308
1309 if (type != NULL) {
1310 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1311 }
1312}
1313
1318static void
1319pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1320 assert(node->body.size > 0);
1321 const size_t size = node->body.size - (last_value ? 1 : 0);
1322 for (size_t index = 0; index < size; index++) {
1323 pm_void_statement_check(parser, node->body.nodes[index]);
1324 }
1325}
1326
1332typedef enum {
1333 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1334 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1335 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1336} pm_conditional_predicate_type_t;
1337
1341static void
1342pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1343 switch (type) {
1344 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1345 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1346 break;
1347 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1348 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1349 break;
1350 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1351 break;
1352 }
1353}
1354
1359static bool
1360pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1361 switch (PM_NODE_TYPE(node)) {
1362 case PM_ARRAY_NODE: {
1363 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1364
1365 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1366 for (size_t index = 0; index < cast->elements.size; index++) {
1367 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1368 }
1369
1370 return true;
1371 }
1372 case PM_HASH_NODE: {
1373 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1374
1375 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1376 for (size_t index = 0; index < cast->elements.size; index++) {
1377 const pm_node_t *element = cast->elements.nodes[index];
1378 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1379
1380 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1381 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1382 }
1383
1384 return true;
1385 }
1386 case PM_FALSE_NODE:
1387 case PM_FLOAT_NODE:
1388 case PM_IMAGINARY_NODE:
1389 case PM_INTEGER_NODE:
1390 case PM_NIL_NODE:
1391 case PM_RATIONAL_NODE:
1396 case PM_STRING_NODE:
1397 case PM_SYMBOL_NODE:
1398 case PM_TRUE_NODE:
1399 return true;
1400 default:
1401 return false;
1402 }
1403}
1404
1409static inline void
1410pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1411 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1412 pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1413 }
1414}
1415
1428static void
1429pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1430 switch (PM_NODE_TYPE(node)) {
1431 case PM_AND_NODE: {
1432 pm_and_node_t *cast = (pm_and_node_t *) node;
1433 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1434 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1435 break;
1436 }
1437 case PM_OR_NODE: {
1438 pm_or_node_t *cast = (pm_or_node_t *) node;
1439 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1440 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1441 break;
1442 }
1443 case PM_PARENTHESES_NODE: {
1445
1446 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1447 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1448 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1449 }
1450
1451 break;
1452 }
1453 case PM_BEGIN_NODE: {
1454 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1455 if (cast->statements != NULL) {
1456 pm_statements_node_t *statements = cast->statements;
1457 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1458 }
1459 break;
1460 }
1461 case PM_RANGE_NODE: {
1462 pm_range_node_t *cast = (pm_range_node_t *) node;
1463
1464 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1465 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1466
1467 // Here we change the range node into a flip flop node. We can do
1468 // this since the nodes are exactly the same except for the type.
1469 // We're only asserting against the size when we should probably
1470 // assert against the entire layout, but we'll assume tests will
1471 // catch this.
1472 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1473 node->type = PM_FLIP_FLOP_NODE;
1474
1475 break;
1476 }
1478 // Here we change the regular expression node into a match last line
1479 // node. We can do this since the nodes are exactly the same except
1480 // for the type.
1483
1484 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1485 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1486 }
1487
1488 break;
1490 // Here we change the interpolated regular expression node into an
1491 // interpolated match last line node. We can do this since the nodes
1492 // are exactly the same except for the type.
1495
1496 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1497 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1498 }
1499
1500 break;
1501 case PM_INTEGER_NODE:
1502 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1503 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1504 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1505 }
1506 } else {
1507 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1508 }
1509 break;
1510 case PM_STRING_NODE:
1513 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1514 break;
1515 case PM_SYMBOL_NODE:
1517 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1518 break;
1521 case PM_FLOAT_NODE:
1522 case PM_RATIONAL_NODE:
1523 case PM_IMAGINARY_NODE:
1524 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1525 break;
1527 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1528 break;
1530 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1531 break;
1533 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1534 break;
1536 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1537 break;
1539 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1540 break;
1542 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1543 break;
1544 default:
1545 break;
1546 }
1547}
1548
1557static inline pm_token_t
1558not_provided(pm_parser_t *parser) {
1559 return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1560}
1561
1562#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1563#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1564#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1565#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
1566#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
1567#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
1568
1591
1595static inline const uint8_t *
1596pm_arguments_end(pm_arguments_t *arguments) {
1597 if (arguments->block != NULL) {
1598 const uint8_t *end = arguments->block->location.end;
1599 if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1600 end = arguments->closing_loc.end;
1601 }
1602 return end;
1603 }
1604 if (arguments->closing_loc.start != NULL) {
1605 return arguments->closing_loc.end;
1606 }
1607 if (arguments->arguments != NULL) {
1608 return arguments->arguments->base.location.end;
1609 }
1610 return arguments->closing_loc.end;
1611}
1612
1617static void
1618pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1619 // First, check that we have arguments and that we don't have a closing
1620 // location for them.
1621 if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1622 return;
1623 }
1624
1625 // Next, check that we don't have a single parentheses argument. This would
1626 // look like:
1627 //
1628 // foo (1) {}
1629 //
1630 // In this case, it's actually okay for the block to be attached to the
1631 // call, even though it looks like it's attached to the argument.
1632 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1633 return;
1634 }
1635
1636 // If we didn't hit a case before this check, then at this point we need to
1637 // add a syntax error.
1638 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1639}
1640
1641/******************************************************************************/
1642/* Basic character checks */
1643/******************************************************************************/
1644
1651static inline size_t
1652char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
1653 if (parser->encoding_changed) {
1654 size_t width;
1655 if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
1656 return width;
1657 } else if (*b == '_') {
1658 return 1;
1659 } else if (*b >= 0x80) {
1660 return parser->encoding->char_width(b, parser->end - b);
1661 } else {
1662 return 0;
1663 }
1664 } else if (*b < 0x80) {
1665 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1666 } else {
1667 return pm_encoding_utf_8_char_width(b, parser->end - b);
1668 }
1669}
1670
1675static inline size_t
1676char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
1677 if (*b < 0x80) {
1678 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1679 } else {
1680 return pm_encoding_utf_8_char_width(b, end - b);
1681 }
1682}
1683
1689static inline size_t
1690char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
1691 if (parser->encoding_changed) {
1692 size_t width;
1693 if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
1694 return width;
1695 } else if (*b == '_') {
1696 return 1;
1697 } else if (*b >= 0x80) {
1698 return parser->encoding->char_width(b, parser->end - b);
1699 } else {
1700 return 0;
1701 }
1702 }
1703 return char_is_identifier_utf8(b, parser->end);
1704}
1705
1706// Here we're defining a perfect hash for the characters that are allowed in
1707// global names. This is used to quickly check the next character after a $ to
1708// see if it's a valid character for a global name.
1709#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1710#define PUNCT(idx) ( \
1711 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1712 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1713 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1714 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1715 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1716 BIT('0', idx))
1717
1718const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1719
1720#undef BIT
1721#undef PUNCT
1722
1723static inline bool
1724char_is_global_name_punctuation(const uint8_t b) {
1725 const unsigned int i = (const unsigned int) b;
1726 if (i <= 0x20 || 0x7e < i) return false;
1727
1728 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1729}
1730
1731static inline bool
1732token_is_setter_name(pm_token_t *token) {
1733 return (
1734 (token->type == PM_TOKEN_IDENTIFIER) &&
1735 (token->end - token->start >= 2) &&
1736 (token->end[-1] == '=')
1737 );
1738}
1739
1743static bool
1744pm_local_is_keyword(const char *source, size_t length) {
1745#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1746
1747 switch (length) {
1748 case 2:
1749 switch (source[0]) {
1750 case 'd': KEYWORD("do"); return false;
1751 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1752 case 'o': KEYWORD("or"); return false;
1753 default: return false;
1754 }
1755 case 3:
1756 switch (source[0]) {
1757 case 'a': KEYWORD("and"); return false;
1758 case 'd': KEYWORD("def"); return false;
1759 case 'e': KEYWORD("end"); return false;
1760 case 'f': KEYWORD("for"); return false;
1761 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1762 default: return false;
1763 }
1764 case 4:
1765 switch (source[0]) {
1766 case 'c': KEYWORD("case"); return false;
1767 case 'e': KEYWORD("else"); return false;
1768 case 'n': KEYWORD("next"); return false;
1769 case 'r': KEYWORD("redo"); return false;
1770 case 's': KEYWORD("self"); return false;
1771 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1772 case 'w': KEYWORD("when"); return false;
1773 default: return false;
1774 }
1775 case 5:
1776 switch (source[0]) {
1777 case 'a': KEYWORD("alias"); return false;
1778 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1779 case 'c': KEYWORD("class"); return false;
1780 case 'e': KEYWORD("elsif"); return false;
1781 case 'f': KEYWORD("false"); return false;
1782 case 'r': KEYWORD("retry"); return false;
1783 case 's': KEYWORD("super"); return false;
1784 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1785 case 'w': KEYWORD("while"); return false;
1786 case 'y': KEYWORD("yield"); return false;
1787 default: return false;
1788 }
1789 case 6:
1790 switch (source[0]) {
1791 case 'e': KEYWORD("ensure"); return false;
1792 case 'm': KEYWORD("module"); return false;
1793 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1794 case 'u': KEYWORD("unless"); return false;
1795 default: return false;
1796 }
1797 case 8:
1798 KEYWORD("__LINE__");
1799 KEYWORD("__FILE__");
1800 return false;
1801 case 12:
1802 KEYWORD("__ENCODING__");
1803 return false;
1804 default:
1805 return false;
1806 }
1807
1808#undef KEYWORD
1809}
1810
1811/******************************************************************************/
1812/* Node flag handling functions */
1813/******************************************************************************/
1814
1818static inline void
1819pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1820 node->flags |= flag;
1821}
1822
1826static inline void
1827pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1828 node->flags &= (pm_node_flags_t) ~flag;
1829}
1830
1834static inline void
1835pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1836 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1837 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1838 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1839 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1840 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1841 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1842 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1843 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1844
1845 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1846}
1847
1848/******************************************************************************/
1849/* Node creation functions */
1850/******************************************************************************/
1851
1857#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1858
1862static inline pm_node_flags_t
1863pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1864 pm_node_flags_t flags = 0;
1865
1866 if (closing->type == PM_TOKEN_REGEXP_END) {
1867 pm_buffer_t unknown_flags = { 0 };
1868
1869 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1870 switch (*flag) {
1871 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1872 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1873 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1874 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1875
1876 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1877 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1878 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1879 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1880
1881 default: pm_buffer_append_byte(&unknown_flags, *flag);
1882 }
1883 }
1884
1885 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1886 if (unknown_flags_length != 0) {
1887 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1888 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1889 }
1890 pm_buffer_free(&unknown_flags);
1891 }
1892
1893 return flags;
1894}
1895
1896#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1897
1898static pm_statements_node_t *
1899pm_statements_node_create(pm_parser_t *parser);
1900
1901static void
1902pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1903
1904static size_t
1905pm_statements_node_body_length(pm_statements_node_t *node);
1906
1911static inline void *
1912pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1913 void *memory = xcalloc(1, size);
1914 if (memory == NULL) {
1915 fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1916 abort();
1917 }
1918 return memory;
1919}
1920
1921#define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
1922#define PM_NODE_IDENTIFY(parser) (++parser->node_id)
1923
1927static pm_missing_node_t *
1928pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1929 pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1930
1931 *node = (pm_missing_node_t) {{
1932 .type = PM_MISSING_NODE,
1933 .node_id = PM_NODE_IDENTIFY(parser),
1934 .location = { .start = start, .end = end }
1935 }};
1936
1937 return node;
1938}
1939
1943static pm_alias_global_variable_node_t *
1944pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1945 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1946 pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1947
1948 *node = (pm_alias_global_variable_node_t) {
1949 {
1950 .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
1951 .node_id = PM_NODE_IDENTIFY(parser),
1952 .location = {
1953 .start = keyword->start,
1954 .end = old_name->location.end
1955 },
1956 },
1957 .new_name = new_name,
1958 .old_name = old_name,
1959 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1960 };
1961
1962 return node;
1963}
1964
1968static pm_alias_method_node_t *
1969pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1970 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1971 pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1972
1973 *node = (pm_alias_method_node_t) {
1974 {
1975 .type = PM_ALIAS_METHOD_NODE,
1976 .node_id = PM_NODE_IDENTIFY(parser),
1977 .location = {
1978 .start = keyword->start,
1979 .end = old_name->location.end
1980 },
1981 },
1982 .new_name = new_name,
1983 .old_name = old_name,
1984 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1985 };
1986
1987 return node;
1988}
1989
1993static pm_alternation_pattern_node_t *
1994pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
1995 pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
1996
1997 *node = (pm_alternation_pattern_node_t) {
1998 {
1999 .type = PM_ALTERNATION_PATTERN_NODE,
2000 .node_id = PM_NODE_IDENTIFY(parser),
2001 .location = {
2002 .start = left->location.start,
2003 .end = right->location.end
2004 },
2005 },
2006 .left = left,
2007 .right = right,
2008 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2009 };
2010
2011 return node;
2012}
2013
2017static pm_and_node_t *
2018pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2019 pm_assert_value_expression(parser, left);
2020
2021 pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2022
2023 *node = (pm_and_node_t) {
2024 {
2025 .type = PM_AND_NODE,
2026 .node_id = PM_NODE_IDENTIFY(parser),
2027 .location = {
2028 .start = left->location.start,
2029 .end = right->location.end
2030 },
2031 },
2032 .left = left,
2033 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2034 .right = right
2035 };
2036
2037 return node;
2038}
2039
2043static pm_arguments_node_t *
2044pm_arguments_node_create(pm_parser_t *parser) {
2045 pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2046
2047 *node = (pm_arguments_node_t) {
2048 {
2049 .type = PM_ARGUMENTS_NODE,
2050 .node_id = PM_NODE_IDENTIFY(parser),
2051 .location = PM_LOCATION_NULL_VALUE(parser)
2052 },
2053 .arguments = { 0 }
2054 };
2055
2056 return node;
2057}
2058
2062static size_t
2063pm_arguments_node_size(pm_arguments_node_t *node) {
2064 return node->arguments.size;
2065}
2066
2070static void
2071pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2072 if (pm_arguments_node_size(node) == 0) {
2073 node->base.location.start = argument->location.start;
2074 }
2075
2076 node->base.location.end = argument->location.end;
2077 pm_node_list_append(&node->arguments, argument);
2078
2079 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2080 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2081 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2082 } else {
2083 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2084 }
2085 }
2086}
2087
2091static pm_array_node_t *
2092pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2093 pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2094
2095 *node = (pm_array_node_t) {
2096 {
2097 .type = PM_ARRAY_NODE,
2098 .flags = PM_NODE_FLAG_STATIC_LITERAL,
2099 .node_id = PM_NODE_IDENTIFY(parser),
2100 .location = PM_LOCATION_TOKEN_VALUE(opening)
2101 },
2102 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2103 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2104 .elements = { 0 }
2105 };
2106
2107 return node;
2108}
2109
2113static inline void
2114pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2115 if (!node->elements.size && !node->opening_loc.start) {
2116 node->base.location.start = element->location.start;
2117 }
2118
2119 pm_node_list_append(&node->elements, element);
2120 node->base.location.end = element->location.end;
2121
2122 // If the element is not a static literal, then the array is not a static
2123 // literal. Turn that flag off.
2124 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2125 pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
2126 }
2127
2128 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2129 pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2130 }
2131}
2132
2136static void
2137pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2138 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2139 node->base.location.end = closing->end;
2140 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2141}
2142
2147static pm_array_pattern_node_t *
2148pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2149 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2150
2151 *node = (pm_array_pattern_node_t) {
2152 {
2153 .type = PM_ARRAY_PATTERN_NODE,
2154 .node_id = PM_NODE_IDENTIFY(parser),
2155 .location = {
2156 .start = nodes->nodes[0]->location.start,
2157 .end = nodes->nodes[nodes->size - 1]->location.end
2158 },
2159 },
2160 .constant = NULL,
2161 .rest = NULL,
2162 .requireds = { 0 },
2163 .posts = { 0 },
2164 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2165 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2166 };
2167
2168 // For now we're going to just copy over each pointer manually. This could be
2169 // much more efficient, as we could instead resize the node list.
2170 bool found_rest = false;
2171 pm_node_t *child;
2172
2173 PM_NODE_LIST_FOREACH(nodes, index, child) {
2174 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2175 node->rest = child;
2176 found_rest = true;
2177 } else if (found_rest) {
2178 pm_node_list_append(&node->posts, child);
2179 } else {
2180 pm_node_list_append(&node->requireds, child);
2181 }
2182 }
2183
2184 return node;
2185}
2186
2190static pm_array_pattern_node_t *
2191pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2192 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2193
2194 *node = (pm_array_pattern_node_t) {
2195 {
2196 .type = PM_ARRAY_PATTERN_NODE,
2197 .node_id = PM_NODE_IDENTIFY(parser),
2198 .location = rest->location,
2199 },
2200 .constant = NULL,
2201 .rest = rest,
2202 .requireds = { 0 },
2203 .posts = { 0 },
2204 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2205 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2206 };
2207
2208 return node;
2209}
2210
2215static pm_array_pattern_node_t *
2216pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2217 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2218
2219 *node = (pm_array_pattern_node_t) {
2220 {
2221 .type = PM_ARRAY_PATTERN_NODE,
2222 .node_id = PM_NODE_IDENTIFY(parser),
2223 .location = {
2224 .start = constant->location.start,
2225 .end = closing->end
2226 },
2227 },
2228 .constant = constant,
2229 .rest = NULL,
2230 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2231 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2232 .requireds = { 0 },
2233 .posts = { 0 }
2234 };
2235
2236 return node;
2237}
2238
2243static pm_array_pattern_node_t *
2244pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2245 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2246
2247 *node = (pm_array_pattern_node_t) {
2248 {
2249 .type = PM_ARRAY_PATTERN_NODE,
2250 .node_id = PM_NODE_IDENTIFY(parser),
2251 .location = {
2252 .start = opening->start,
2253 .end = closing->end
2254 },
2255 },
2256 .constant = NULL,
2257 .rest = NULL,
2258 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2259 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2260 .requireds = { 0 },
2261 .posts = { 0 }
2262 };
2263
2264 return node;
2265}
2266
2267static inline void
2268pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2269 pm_node_list_append(&node->requireds, inner);
2270}
2271
2275static pm_assoc_node_t *
2276pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2277 pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2278 const uint8_t *end;
2279
2280 if (value != NULL && value->location.end > key->location.end) {
2281 end = value->location.end;
2282 } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2283 end = operator->end;
2284 } else {
2285 end = key->location.end;
2286 }
2287
2288 // Hash string keys will be frozen, so we can mark them as frozen here so
2289 // that the compiler picks them up and also when we check for static literal
2290 // on the keys it gets factored in.
2291 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2292 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2293 }
2294
2295 // If the key and value of this assoc node are both static literals, then
2296 // we can mark this node as a static literal.
2297 pm_node_flags_t flags = 0;
2298 if (
2299 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2300 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2301 ) {
2302 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2303 }
2304
2305 *node = (pm_assoc_node_t) {
2306 {
2307 .type = PM_ASSOC_NODE,
2308 .flags = flags,
2309 .node_id = PM_NODE_IDENTIFY(parser),
2310 .location = {
2311 .start = key->location.start,
2312 .end = end
2313 },
2314 },
2315 .key = key,
2316 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2317 .value = value
2318 };
2319
2320 return node;
2321}
2322
2326static pm_assoc_splat_node_t *
2327pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2328 assert(operator->type == PM_TOKEN_USTAR_STAR);
2329 pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2330
2331 *node = (pm_assoc_splat_node_t) {
2332 {
2333 .type = PM_ASSOC_SPLAT_NODE,
2334 .node_id = PM_NODE_IDENTIFY(parser),
2335 .location = {
2336 .start = operator->start,
2337 .end = value == NULL ? operator->end : value->location.end
2338 },
2339 },
2340 .value = value,
2341 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2342 };
2343
2344 return node;
2345}
2346
2350static pm_back_reference_read_node_t *
2351pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2352 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2353 pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2354
2355 *node = (pm_back_reference_read_node_t) {
2356 {
2357 .type = PM_BACK_REFERENCE_READ_NODE,
2358 .node_id = PM_NODE_IDENTIFY(parser),
2359 .location = PM_LOCATION_TOKEN_VALUE(name),
2360 },
2361 .name = pm_parser_constant_id_token(parser, name)
2362 };
2363
2364 return node;
2365}
2366
2370static pm_begin_node_t *
2371pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2372 pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2373
2374 *node = (pm_begin_node_t) {
2375 {
2376 .type = PM_BEGIN_NODE,
2377 .node_id = PM_NODE_IDENTIFY(parser),
2378 .location = {
2379 .start = begin_keyword->start,
2380 .end = statements == NULL ? begin_keyword->end : statements->base.location.end
2381 },
2382 },
2383 .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2384 .statements = statements,
2385 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2386 };
2387
2388 return node;
2389}
2390
2394static void
2395pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2396 // If the begin keyword doesn't exist, we set the start on the begin_node
2397 if (!node->begin_keyword_loc.start) {
2398 node->base.location.start = rescue_clause->base.location.start;
2399 }
2400 node->base.location.end = rescue_clause->base.location.end;
2401 node->rescue_clause = rescue_clause;
2402}
2403
2407static void
2408pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2409 node->base.location.end = else_clause->base.location.end;
2410 node->else_clause = else_clause;
2411}
2412
2416static void
2417pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2418 node->base.location.end = ensure_clause->base.location.end;
2419 node->ensure_clause = ensure_clause;
2420}
2421
2425static void
2426pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2427 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2428
2429 node->base.location.end = end_keyword->end;
2430 node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2431}
2432
2436static pm_block_argument_node_t *
2437pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2438 pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2439
2440 *node = (pm_block_argument_node_t) {
2441 {
2442 .type = PM_BLOCK_ARGUMENT_NODE,
2443 .node_id = PM_NODE_IDENTIFY(parser),
2444 .location = {
2445 .start = operator->start,
2446 .end = expression == NULL ? operator->end : expression->location.end
2447 },
2448 },
2449 .expression = expression,
2450 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2451 };
2452
2453 return node;
2454}
2455
2459static pm_block_node_t *
2460pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2461 pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2462
2463 *node = (pm_block_node_t) {
2464 {
2465 .type = PM_BLOCK_NODE,
2466 .node_id = PM_NODE_IDENTIFY(parser),
2467 .location = { .start = opening->start, .end = closing->end },
2468 },
2469 .locals = *locals,
2470 .parameters = parameters,
2471 .body = body,
2472 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2473 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2474 };
2475
2476 return node;
2477}
2478
2482static pm_block_parameter_node_t *
2483pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2484 assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2485 pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2486
2487 *node = (pm_block_parameter_node_t) {
2488 {
2489 .type = PM_BLOCK_PARAMETER_NODE,
2490 .node_id = PM_NODE_IDENTIFY(parser),
2491 .location = {
2492 .start = operator->start,
2493 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
2494 },
2495 },
2496 .name = pm_parser_optional_constant_id_token(parser, name),
2497 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2498 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2499 };
2500
2501 return node;
2502}
2503
2507static pm_block_parameters_node_t *
2508pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2509 pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2510
2511 const uint8_t *start;
2512 if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2513 start = opening->start;
2514 } else if (parameters != NULL) {
2515 start = parameters->base.location.start;
2516 } else {
2517 start = NULL;
2518 }
2519
2520 const uint8_t *end;
2521 if (parameters != NULL) {
2522 end = parameters->base.location.end;
2523 } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2524 end = opening->end;
2525 } else {
2526 end = NULL;
2527 }
2528
2529 *node = (pm_block_parameters_node_t) {
2530 {
2531 .type = PM_BLOCK_PARAMETERS_NODE,
2532 .node_id = PM_NODE_IDENTIFY(parser),
2533 .location = {
2534 .start = start,
2535 .end = end
2536 }
2537 },
2538 .parameters = parameters,
2539 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2540 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2541 .locals = { 0 }
2542 };
2543
2544 return node;
2545}
2546
2550static void
2551pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2552 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2553
2554 node->base.location.end = closing->end;
2555 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2556}
2557
2561static pm_block_local_variable_node_t *
2562pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2563 pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2564
2565 *node = (pm_block_local_variable_node_t) {
2566 {
2567 .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
2568 .node_id = PM_NODE_IDENTIFY(parser),
2569 .location = PM_LOCATION_TOKEN_VALUE(name),
2570 },
2571 .name = pm_parser_constant_id_token(parser, name)
2572 };
2573
2574 return node;
2575}
2576
2580static void
2581pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2582 pm_node_list_append(&node->locals, (pm_node_t *) local);
2583
2584 if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2585 node->base.location.end = local->base.location.end;
2586}
2587
2591static pm_break_node_t *
2592pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2593 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2594 pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2595
2596 *node = (pm_break_node_t) {
2597 {
2598 .type = PM_BREAK_NODE,
2599 .node_id = PM_NODE_IDENTIFY(parser),
2600 .location = {
2601 .start = keyword->start,
2602 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
2603 },
2604 },
2605 .arguments = arguments,
2606 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2607 };
2608
2609 return node;
2610}
2611
2612// There are certain flags that we want to use internally but don't want to
2613// expose because they are not relevant beyond parsing. Therefore we'll define
2614// them here and not define them in config.yml/a header file.
2615static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4;
2616static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40;
2617static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80;
2618static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100;
2619
2625static pm_call_node_t *
2626pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2627 pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2628
2629 *node = (pm_call_node_t) {
2630 {
2631 .type = PM_CALL_NODE,
2632 .flags = flags,
2633 .node_id = PM_NODE_IDENTIFY(parser),
2634 .location = PM_LOCATION_NULL_VALUE(parser),
2635 },
2636 .receiver = NULL,
2637 .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2638 .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2639 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2640 .arguments = NULL,
2641 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2642 .block = NULL,
2643 .name = 0
2644 };
2645
2646 return node;
2647}
2648
2653static inline pm_node_flags_t
2654pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2655 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2656}
2657
2662static pm_call_node_t *
2663pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2664 pm_assert_value_expression(parser, receiver);
2665
2666 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2667 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2668 flags |= PM_CALL_NODE_FLAGS_INDEX;
2669 }
2670
2671 pm_call_node_t *node = pm_call_node_create(parser, flags);
2672
2673 node->base.location.start = receiver->location.start;
2674 node->base.location.end = pm_arguments_end(arguments);
2675
2676 node->receiver = receiver;
2677 node->message_loc.start = arguments->opening_loc.start;
2678 node->message_loc.end = arguments->closing_loc.end;
2679
2680 node->opening_loc = arguments->opening_loc;
2681 node->arguments = arguments->arguments;
2682 node->closing_loc = arguments->closing_loc;
2683 node->block = arguments->block;
2684
2685 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2686 return node;
2687}
2688
2692static pm_call_node_t *
2693pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2694 pm_assert_value_expression(parser, receiver);
2695 pm_assert_value_expression(parser, argument);
2696
2697 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2698
2699 node->base.location.start = MIN(receiver->location.start, argument->location.start);
2700 node->base.location.end = MAX(receiver->location.end, argument->location.end);
2701
2702 node->receiver = receiver;
2703 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2704
2705 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2706 pm_arguments_node_arguments_append(arguments, argument);
2707 node->arguments = arguments;
2708
2709 node->name = pm_parser_constant_id_token(parser, operator);
2710 return node;
2711}
2712
2716static pm_call_node_t *
2717pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2718 pm_assert_value_expression(parser, receiver);
2719
2720 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2721
2722 node->base.location.start = receiver->location.start;
2723 const uint8_t *end = pm_arguments_end(arguments);
2724 if (end == NULL) {
2725 end = message->end;
2726 }
2727 node->base.location.end = end;
2728
2729 node->receiver = receiver;
2730 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2731 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2732 node->opening_loc = arguments->opening_loc;
2733 node->arguments = arguments->arguments;
2734 node->closing_loc = arguments->closing_loc;
2735 node->block = arguments->block;
2736
2737 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2738 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2739 }
2740
2741 node->name = pm_parser_constant_id_token(parser, message);
2742 return node;
2743}
2744
2748static pm_call_node_t *
2749pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2750 pm_call_node_t *node = pm_call_node_create(parser, 0);
2751 node->base.location.start = parser->start;
2752 node->base.location.end = parser->end;
2753
2754 node->receiver = receiver;
2755 node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2756 node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2757 node->arguments = arguments;
2758
2759 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2760 return node;
2761}
2762
2767static pm_call_node_t *
2768pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2769 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2770
2771 node->base.location.start = message->start;
2772 node->base.location.end = pm_arguments_end(arguments);
2773
2774 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2775 node->opening_loc = arguments->opening_loc;
2776 node->arguments = arguments->arguments;
2777 node->closing_loc = arguments->closing_loc;
2778 node->block = arguments->block;
2779
2780 node->name = pm_parser_constant_id_token(parser, message);
2781 return node;
2782}
2783
2788static pm_call_node_t *
2789pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2790 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2791
2792 node->base.location = PM_LOCATION_NULL_VALUE(parser);
2793 node->arguments = arguments;
2794
2795 node->name = name;
2796 return node;
2797}
2798
2802static pm_call_node_t *
2803pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2804 pm_assert_value_expression(parser, receiver);
2805 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2806
2807 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2808
2809 node->base.location.start = message->start;
2810 if (arguments->closing_loc.start != NULL) {
2811 node->base.location.end = arguments->closing_loc.end;
2812 } else {
2813 assert(receiver != NULL);
2814 node->base.location.end = receiver->location.end;
2815 }
2816
2817 node->receiver = receiver;
2818 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2819 node->opening_loc = arguments->opening_loc;
2820 node->arguments = arguments->arguments;
2821 node->closing_loc = arguments->closing_loc;
2822
2823 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2824 return node;
2825}
2826
2830static pm_call_node_t *
2831pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2832 pm_assert_value_expression(parser, receiver);
2833
2834 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2835
2836 node->base.location.start = receiver->location.start;
2837 node->base.location.end = pm_arguments_end(arguments);
2838
2839 node->receiver = receiver;
2840 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2841 node->opening_loc = arguments->opening_loc;
2842 node->arguments = arguments->arguments;
2843 node->closing_loc = arguments->closing_loc;
2844 node->block = arguments->block;
2845
2846 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2847 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2848 }
2849
2850 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2851 return node;
2852}
2853
2857static pm_call_node_t *
2858pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2859 pm_assert_value_expression(parser, receiver);
2860
2861 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2862
2863 node->base.location.start = operator->start;
2864 node->base.location.end = receiver->location.end;
2865
2866 node->receiver = receiver;
2867 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2868
2869 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2870 return node;
2871}
2872
2877static pm_call_node_t *
2878pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2879 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2880
2881 node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2882 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2883
2884 node->name = pm_parser_constant_id_token(parser, message);
2885 return node;
2886}
2887
2892static inline bool
2893pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2894 return (
2895 (node->message_loc.start != NULL) &&
2896 (node->message_loc.end[-1] != '!') &&
2897 (node->message_loc.end[-1] != '?') &&
2898 char_is_identifier_start(parser, node->message_loc.start) &&
2899 (node->opening_loc.start == NULL) &&
2900 (node->arguments == NULL) &&
2901 (node->block == NULL)
2902 );
2903}
2904
2908static void
2909pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2910 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2911
2912 if (write_constant->length > 0) {
2913 size_t length = write_constant->length - 1;
2914
2915 void *memory = xmalloc(length);
2916 memcpy(memory, write_constant->start, length);
2917
2918 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2919 } else {
2920 // We can get here if the message was missing because of a syntax error.
2921 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2922 }
2923}
2924
2928static pm_call_and_write_node_t *
2929pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2930 assert(target->block == NULL);
2931 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2932 pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2933
2934 *node = (pm_call_and_write_node_t) {
2935 {
2936 .type = PM_CALL_AND_WRITE_NODE,
2937 .flags = target->base.flags,
2938 .node_id = PM_NODE_IDENTIFY(parser),
2939 .location = {
2940 .start = target->base.location.start,
2941 .end = value->location.end
2942 }
2943 },
2944 .receiver = target->receiver,
2945 .call_operator_loc = target->call_operator_loc,
2946 .message_loc = target->message_loc,
2947 .read_name = 0,
2948 .write_name = target->name,
2949 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2950 .value = value
2951 };
2952
2953 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2954
2955 // Here we're going to free the target, since it is no longer necessary.
2956 // However, we don't want to call `pm_node_destroy` because we want to keep
2957 // around all of its children since we just reused them.
2958 xfree(target);
2959
2960 return node;
2961}
2962
2967static void
2968pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2969 if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
2970 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2971 pm_node_t *node;
2972 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2973 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2974 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2975 break;
2976 }
2977 }
2978 }
2979
2980 if (block != NULL) {
2981 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2982 }
2983 }
2984}
2985
2989static pm_index_and_write_node_t *
2990pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2991 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2992 pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
2993
2994 pm_index_arguments_check(parser, target->arguments, target->block);
2995
2996 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2997 *node = (pm_index_and_write_node_t) {
2998 {
2999 .type = PM_INDEX_AND_WRITE_NODE,
3000 .flags = target->base.flags,
3001 .node_id = PM_NODE_IDENTIFY(parser),
3002 .location = {
3003 .start = target->base.location.start,
3004 .end = value->location.end
3005 }
3006 },
3007 .receiver = target->receiver,
3008 .call_operator_loc = target->call_operator_loc,
3009 .opening_loc = target->opening_loc,
3010 .arguments = target->arguments,
3011 .closing_loc = target->closing_loc,
3012 .block = (pm_block_argument_node_t *) target->block,
3013 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3014 .value = value
3015 };
3016
3017 // Here we're going to free the target, since it is no longer necessary.
3018 // However, we don't want to call `pm_node_destroy` because we want to keep
3019 // around all of its children since we just reused them.
3020 xfree(target);
3021
3022 return node;
3023}
3024
3028static pm_call_operator_write_node_t *
3029pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3030 assert(target->block == NULL);
3031 pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
3032
3033 *node = (pm_call_operator_write_node_t) {
3034 {
3035 .type = PM_CALL_OPERATOR_WRITE_NODE,
3036 .flags = target->base.flags,
3037 .node_id = PM_NODE_IDENTIFY(parser),
3038 .location = {
3039 .start = target->base.location.start,
3040 .end = value->location.end
3041 }
3042 },
3043 .receiver = target->receiver,
3044 .call_operator_loc = target->call_operator_loc,
3045 .message_loc = target->message_loc,
3046 .read_name = 0,
3047 .write_name = target->name,
3048 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3049 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3050 .value = value
3051 };
3052
3053 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3054
3055 // Here we're going to free the target, since it is no longer necessary.
3056 // However, we don't want to call `pm_node_destroy` because we want to keep
3057 // around all of its children since we just reused them.
3058 xfree(target);
3059
3060 return node;
3061}
3062
3066static pm_index_operator_write_node_t *
3067pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3068 pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
3069
3070 pm_index_arguments_check(parser, target->arguments, target->block);
3071
3072 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3073 *node = (pm_index_operator_write_node_t) {
3074 {
3075 .type = PM_INDEX_OPERATOR_WRITE_NODE,
3076 .flags = target->base.flags,
3077 .node_id = PM_NODE_IDENTIFY(parser),
3078 .location = {
3079 .start = target->base.location.start,
3080 .end = value->location.end
3081 }
3082 },
3083 .receiver = target->receiver,
3084 .call_operator_loc = target->call_operator_loc,
3085 .opening_loc = target->opening_loc,
3086 .arguments = target->arguments,
3087 .closing_loc = target->closing_loc,
3088 .block = (pm_block_argument_node_t *) target->block,
3089 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3090 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3091 .value = value
3092 };
3093
3094 // Here we're going to free the target, since it is no longer necessary.
3095 // However, we don't want to call `pm_node_destroy` because we want to keep
3096 // around all of its children since we just reused them.
3097 xfree(target);
3098
3099 return node;
3100}
3101
3105static pm_call_or_write_node_t *
3106pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3107 assert(target->block == NULL);
3108 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3109 pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3110
3111 *node = (pm_call_or_write_node_t) {
3112 {
3113 .type = PM_CALL_OR_WRITE_NODE,
3114 .flags = target->base.flags,
3115 .node_id = PM_NODE_IDENTIFY(parser),
3116 .location = {
3117 .start = target->base.location.start,
3118 .end = value->location.end
3119 }
3120 },
3121 .receiver = target->receiver,
3122 .call_operator_loc = target->call_operator_loc,
3123 .message_loc = target->message_loc,
3124 .read_name = 0,
3125 .write_name = target->name,
3126 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3127 .value = value
3128 };
3129
3130 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3131
3132 // Here we're going to free the target, since it is no longer necessary.
3133 // However, we don't want to call `pm_node_destroy` because we want to keep
3134 // around all of its children since we just reused them.
3135 xfree(target);
3136
3137 return node;
3138}
3139
3143static pm_index_or_write_node_t *
3144pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3145 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3146 pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3147
3148 pm_index_arguments_check(parser, target->arguments, target->block);
3149
3150 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3151 *node = (pm_index_or_write_node_t) {
3152 {
3153 .type = PM_INDEX_OR_WRITE_NODE,
3154 .flags = target->base.flags,
3155 .node_id = PM_NODE_IDENTIFY(parser),
3156 .location = {
3157 .start = target->base.location.start,
3158 .end = value->location.end
3159 }
3160 },
3161 .receiver = target->receiver,
3162 .call_operator_loc = target->call_operator_loc,
3163 .opening_loc = target->opening_loc,
3164 .arguments = target->arguments,
3165 .closing_loc = target->closing_loc,
3166 .block = (pm_block_argument_node_t *) target->block,
3167 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3168 .value = value
3169 };
3170
3171 // Here we're going to free the target, since it is no longer necessary.
3172 // However, we don't want to call `pm_node_destroy` because we want to keep
3173 // around all of its children since we just reused them.
3174 xfree(target);
3175
3176 return node;
3177}
3178
3183static pm_call_target_node_t *
3184pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3185 pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3186
3187 *node = (pm_call_target_node_t) {
3188 {
3189 .type = PM_CALL_TARGET_NODE,
3190 .flags = target->base.flags,
3191 .node_id = PM_NODE_IDENTIFY(parser),
3192 .location = target->base.location
3193 },
3194 .receiver = target->receiver,
3195 .call_operator_loc = target->call_operator_loc,
3196 .name = target->name,
3197 .message_loc = target->message_loc
3198 };
3199
3200 // Here we're going to free the target, since it is no longer necessary.
3201 // However, we don't want to call `pm_node_destroy` because we want to keep
3202 // around all of its children since we just reused them.
3203 xfree(target);
3204
3205 return node;
3206}
3207
3212static pm_index_target_node_t *
3213pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3214 pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3215 pm_node_flags_t flags = target->base.flags;
3216
3217 pm_index_arguments_check(parser, target->arguments, target->block);
3218
3219 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3220 *node = (pm_index_target_node_t) {
3221 {
3222 .type = PM_INDEX_TARGET_NODE,
3223 .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3224 .node_id = PM_NODE_IDENTIFY(parser),
3225 .location = target->base.location
3226 },
3227 .receiver = target->receiver,
3228 .opening_loc = target->opening_loc,
3229 .arguments = target->arguments,
3230 .closing_loc = target->closing_loc,
3231 .block = (pm_block_argument_node_t *) target->block,
3232 };
3233
3234 // Here we're going to free the target, since it is no longer necessary.
3235 // However, we don't want to call `pm_node_destroy` because we want to keep
3236 // around all of its children since we just reused them.
3237 xfree(target);
3238
3239 return node;
3240}
3241
3245static pm_capture_pattern_node_t *
3246pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3247 pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3248
3249 *node = (pm_capture_pattern_node_t) {
3250 {
3251 .type = PM_CAPTURE_PATTERN_NODE,
3252 .node_id = PM_NODE_IDENTIFY(parser),
3253 .location = {
3254 .start = value->location.start,
3255 .end = target->base.location.end
3256 },
3257 },
3258 .value = value,
3259 .target = target,
3260 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3261 };
3262
3263 return node;
3264}
3265
3269static pm_case_node_t *
3270pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3271 pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3272
3273 *node = (pm_case_node_t) {
3274 {
3275 .type = PM_CASE_NODE,
3276 .node_id = PM_NODE_IDENTIFY(parser),
3277 .location = {
3278 .start = case_keyword->start,
3279 .end = end_keyword->end
3280 },
3281 },
3282 .predicate = predicate,
3283 .else_clause = NULL,
3284 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3285 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3286 .conditions = { 0 }
3287 };
3288
3289 return node;
3290}
3291
3295static void
3296pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3297 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3298
3299 pm_node_list_append(&node->conditions, condition);
3300 node->base.location.end = condition->location.end;
3301}
3302
3306static void
3307pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3308 node->else_clause = else_clause;
3309 node->base.location.end = else_clause->base.location.end;
3310}
3311
3315static void
3316pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3317 node->base.location.end = end_keyword->end;
3318 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3319}
3320
3324static pm_case_match_node_t *
3325pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3326 pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3327
3328 *node = (pm_case_match_node_t) {
3329 {
3330 .type = PM_CASE_MATCH_NODE,
3331 .node_id = PM_NODE_IDENTIFY(parser),
3332 .location = {
3333 .start = case_keyword->start,
3334 .end = end_keyword->end
3335 },
3336 },
3337 .predicate = predicate,
3338 .else_clause = NULL,
3339 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3340 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3341 .conditions = { 0 }
3342 };
3343
3344 return node;
3345}
3346
3350static void
3351pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3352 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3353
3354 pm_node_list_append(&node->conditions, condition);
3355 node->base.location.end = condition->location.end;
3356}
3357
3361static void
3362pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3363 node->else_clause = else_clause;
3364 node->base.location.end = else_clause->base.location.end;
3365}
3366
3370static void
3371pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3372 node->base.location.end = end_keyword->end;
3373 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3374}
3375
3379static pm_class_node_t *
3380pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3381 pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3382
3383 *node = (pm_class_node_t) {
3384 {
3385 .type = PM_CLASS_NODE,
3386 .node_id = PM_NODE_IDENTIFY(parser),
3387 .location = { .start = class_keyword->start, .end = end_keyword->end },
3388 },
3389 .locals = *locals,
3390 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3391 .constant_path = constant_path,
3392 .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3393 .superclass = superclass,
3394 .body = body,
3395 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3396 .name = pm_parser_constant_id_token(parser, name)
3397 };
3398
3399 return node;
3400}
3401
3405static pm_class_variable_and_write_node_t *
3406pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3407 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3408 pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3409
3410 *node = (pm_class_variable_and_write_node_t) {
3411 {
3412 .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
3413 .node_id = PM_NODE_IDENTIFY(parser),
3414 .location = {
3415 .start = target->base.location.start,
3416 .end = value->location.end
3417 }
3418 },
3419 .name = target->name,
3420 .name_loc = target->base.location,
3421 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3422 .value = value
3423 };
3424
3425 return node;
3426}
3427
3431static pm_class_variable_operator_write_node_t *
3432pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3433 pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3434
3435 *node = (pm_class_variable_operator_write_node_t) {
3436 {
3437 .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
3438 .node_id = PM_NODE_IDENTIFY(parser),
3439 .location = {
3440 .start = target->base.location.start,
3441 .end = value->location.end
3442 }
3443 },
3444 .name = target->name,
3445 .name_loc = target->base.location,
3446 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3447 .value = value,
3448 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3449 };
3450
3451 return node;
3452}
3453
3457static pm_class_variable_or_write_node_t *
3458pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3459 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3460 pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3461
3462 *node = (pm_class_variable_or_write_node_t) {
3463 {
3464 .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
3465 .node_id = PM_NODE_IDENTIFY(parser),
3466 .location = {
3467 .start = target->base.location.start,
3468 .end = value->location.end
3469 }
3470 },
3471 .name = target->name,
3472 .name_loc = target->base.location,
3473 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3474 .value = value
3475 };
3476
3477 return node;
3478}
3479
3483static pm_class_variable_read_node_t *
3484pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3485 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3486 pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3487
3488 *node = (pm_class_variable_read_node_t) {
3489 {
3490 .type = PM_CLASS_VARIABLE_READ_NODE,
3491 .node_id = PM_NODE_IDENTIFY(parser),
3492 .location = PM_LOCATION_TOKEN_VALUE(token)
3493 },
3494 .name = pm_parser_constant_id_token(parser, token)
3495 };
3496
3497 return node;
3498}
3499
3506static inline pm_node_flags_t
3507pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3508 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3509 return flags;
3510 }
3511 return 0;
3512}
3513
3517static pm_class_variable_write_node_t *
3518pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3519 pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3520
3521 *node = (pm_class_variable_write_node_t) {
3522 {
3523 .type = PM_CLASS_VARIABLE_WRITE_NODE,
3524 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3525 .node_id = PM_NODE_IDENTIFY(parser),
3526 .location = {
3527 .start = read_node->base.location.start,
3528 .end = value->location.end
3529 },
3530 },
3531 .name = read_node->name,
3532 .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
3533 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3534 .value = value
3535 };
3536
3537 return node;
3538}
3539
3543static pm_constant_path_and_write_node_t *
3544pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3545 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3546 pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3547
3548 *node = (pm_constant_path_and_write_node_t) {
3549 {
3550 .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
3551 .node_id = PM_NODE_IDENTIFY(parser),
3552 .location = {
3553 .start = target->base.location.start,
3554 .end = value->location.end
3555 }
3556 },
3557 .target = target,
3558 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3559 .value = value
3560 };
3561
3562 return node;
3563}
3564
3568static pm_constant_path_operator_write_node_t *
3569pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3570 pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3571
3572 *node = (pm_constant_path_operator_write_node_t) {
3573 {
3574 .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
3575 .node_id = PM_NODE_IDENTIFY(parser),
3576 .location = {
3577 .start = target->base.location.start,
3578 .end = value->location.end
3579 }
3580 },
3581 .target = target,
3582 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3583 .value = value,
3584 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3585 };
3586
3587 return node;
3588}
3589
3593static pm_constant_path_or_write_node_t *
3594pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3595 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3596 pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3597
3598 *node = (pm_constant_path_or_write_node_t) {
3599 {
3600 .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
3601 .node_id = PM_NODE_IDENTIFY(parser),
3602 .location = {
3603 .start = target->base.location.start,
3604 .end = value->location.end
3605 }
3606 },
3607 .target = target,
3608 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3609 .value = value
3610 };
3611
3612 return node;
3613}
3614
3618static pm_constant_path_node_t *
3619pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3620 pm_assert_value_expression(parser, parent);
3621 pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3622
3623 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3624 if (name_token->type == PM_TOKEN_CONSTANT) {
3625 name = pm_parser_constant_id_token(parser, name_token);
3626 }
3627
3628 *node = (pm_constant_path_node_t) {
3629 {
3630 .type = PM_CONSTANT_PATH_NODE,
3631 .node_id = PM_NODE_IDENTIFY(parser),
3632 .location = {
3633 .start = parent == NULL ? delimiter->start : parent->location.start,
3634 .end = name_token->end
3635 },
3636 },
3637 .parent = parent,
3638 .name = name,
3639 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3640 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3641 };
3642
3643 return node;
3644}
3645
3649static pm_constant_path_write_node_t *
3650pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3651 pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3652
3653 *node = (pm_constant_path_write_node_t) {
3654 {
3655 .type = PM_CONSTANT_PATH_WRITE_NODE,
3656 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3657 .node_id = PM_NODE_IDENTIFY(parser),
3658 .location = {
3659 .start = target->base.location.start,
3660 .end = value->location.end
3661 },
3662 },
3663 .target = target,
3664 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3665 .value = value
3666 };
3667
3668 return node;
3669}
3670
3674static pm_constant_and_write_node_t *
3675pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3676 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3677 pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3678
3679 *node = (pm_constant_and_write_node_t) {
3680 {
3681 .type = PM_CONSTANT_AND_WRITE_NODE,
3682 .node_id = PM_NODE_IDENTIFY(parser),
3683 .location = {
3684 .start = target->base.location.start,
3685 .end = value->location.end
3686 }
3687 },
3688 .name = target->name,
3689 .name_loc = target->base.location,
3690 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3691 .value = value
3692 };
3693
3694 return node;
3695}
3696
3700static pm_constant_operator_write_node_t *
3701pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3702 pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3703
3704 *node = (pm_constant_operator_write_node_t) {
3705 {
3706 .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
3707 .node_id = PM_NODE_IDENTIFY(parser),
3708 .location = {
3709 .start = target->base.location.start,
3710 .end = value->location.end
3711 }
3712 },
3713 .name = target->name,
3714 .name_loc = target->base.location,
3715 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3716 .value = value,
3717 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3718 };
3719
3720 return node;
3721}
3722
3726static pm_constant_or_write_node_t *
3727pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3728 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3729 pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3730
3731 *node = (pm_constant_or_write_node_t) {
3732 {
3733 .type = PM_CONSTANT_OR_WRITE_NODE,
3734 .node_id = PM_NODE_IDENTIFY(parser),
3735 .location = {
3736 .start = target->base.location.start,
3737 .end = value->location.end
3738 }
3739 },
3740 .name = target->name,
3741 .name_loc = target->base.location,
3742 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3743 .value = value
3744 };
3745
3746 return node;
3747}
3748
3752static pm_constant_read_node_t *
3753pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3754 assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3755 pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3756
3757 *node = (pm_constant_read_node_t) {
3758 {
3759 .type = PM_CONSTANT_READ_NODE,
3760 .node_id = PM_NODE_IDENTIFY(parser),
3761 .location = PM_LOCATION_TOKEN_VALUE(name)
3762 },
3763 .name = pm_parser_constant_id_token(parser, name)
3764 };
3765
3766 return node;
3767}
3768
3772static pm_constant_write_node_t *
3773pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3774 pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3775
3776 *node = (pm_constant_write_node_t) {
3777 {
3778 .type = PM_CONSTANT_WRITE_NODE,
3779 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3780 .node_id = PM_NODE_IDENTIFY(parser),
3781 .location = {
3782 .start = target->base.location.start,
3783 .end = value->location.end
3784 }
3785 },
3786 .name = target->name,
3787 .name_loc = target->base.location,
3788 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3789 .value = value
3790 };
3791
3792 return node;
3793}
3794
3798static void
3799pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3800 switch (PM_NODE_TYPE(node)) {
3801 case PM_BEGIN_NODE: {
3802 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3803 if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
3804 break;
3805 }
3806 case PM_PARENTHESES_NODE: {
3807 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3808 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3809 break;
3810 }
3811 case PM_STATEMENTS_NODE: {
3812 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3813 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3814 break;
3815 }
3816 case PM_ARRAY_NODE:
3817 case PM_FLOAT_NODE:
3818 case PM_IMAGINARY_NODE:
3819 case PM_INTEGER_NODE:
3820 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3821 case PM_INTERPOLATED_STRING_NODE:
3822 case PM_INTERPOLATED_SYMBOL_NODE:
3823 case PM_INTERPOLATED_X_STRING_NODE:
3824 case PM_RATIONAL_NODE:
3825 case PM_REGULAR_EXPRESSION_NODE:
3826 case PM_SOURCE_ENCODING_NODE:
3827 case PM_SOURCE_FILE_NODE:
3828 case PM_SOURCE_LINE_NODE:
3829 case PM_STRING_NODE:
3830 case PM_SYMBOL_NODE:
3831 case PM_X_STRING_NODE:
3832 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3833 break;
3834 default:
3835 break;
3836 }
3837}
3838
3842static pm_def_node_t *
3843pm_def_node_create(
3844 pm_parser_t *parser,
3845 pm_constant_id_t name,
3846 const pm_token_t *name_loc,
3847 pm_node_t *receiver,
3848 pm_parameters_node_t *parameters,
3849 pm_node_t *body,
3850 pm_constant_id_list_t *locals,
3851 const pm_token_t *def_keyword,
3852 const pm_token_t *operator,
3853 const pm_token_t *lparen,
3854 const pm_token_t *rparen,
3855 const pm_token_t *equal,
3856 const pm_token_t *end_keyword
3857) {
3858 pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3859 const uint8_t *end;
3860
3861 if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
3862 end = body->location.end;
3863 } else {
3864 end = end_keyword->end;
3865 }
3866
3867 if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
3868 pm_def_node_receiver_check(parser, receiver);
3869 }
3870
3871 *node = (pm_def_node_t) {
3872 {
3873 .type = PM_DEF_NODE,
3874 .node_id = PM_NODE_IDENTIFY(parser),
3875 .location = { .start = def_keyword->start, .end = end },
3876 },
3877 .name = name,
3878 .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3879 .receiver = receiver,
3880 .parameters = parameters,
3881 .body = body,
3882 .locals = *locals,
3883 .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3884 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3885 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3886 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3887 .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3888 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3889 };
3890
3891 return node;
3892}
3893
3897static pm_defined_node_t *
3898pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
3899 pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3900
3901 *node = (pm_defined_node_t) {
3902 {
3903 .type = PM_DEFINED_NODE,
3904 .node_id = PM_NODE_IDENTIFY(parser),
3905 .location = {
3906 .start = keyword_loc->start,
3907 .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
3908 },
3909 },
3910 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3911 .value = value,
3912 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3913 .keyword_loc = *keyword_loc
3914 };
3915
3916 return node;
3917}
3918
3922static pm_else_node_t *
3923pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3924 pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3925 const uint8_t *end = NULL;
3926 if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
3927 end = statements->base.location.end;
3928 } else {
3929 end = end_keyword->end;
3930 }
3931
3932 *node = (pm_else_node_t) {
3933 {
3934 .type = PM_ELSE_NODE,
3935 .node_id = PM_NODE_IDENTIFY(parser),
3936 .location = {
3937 .start = else_keyword->start,
3938 .end = end,
3939 },
3940 },
3941 .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3942 .statements = statements,
3943 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3944 };
3945
3946 return node;
3947}
3948
3952static pm_embedded_statements_node_t *
3953pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3954 pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3955
3956 *node = (pm_embedded_statements_node_t) {
3957 {
3958 .type = PM_EMBEDDED_STATEMENTS_NODE,
3959 .node_id = PM_NODE_IDENTIFY(parser),
3960 .location = {
3961 .start = opening->start,
3962 .end = closing->end
3963 }
3964 },
3965 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3966 .statements = statements,
3967 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3968 };
3969
3970 return node;
3971}
3972
3976static pm_embedded_variable_node_t *
3977pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3978 pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3979
3980 *node = (pm_embedded_variable_node_t) {
3981 {
3982 .type = PM_EMBEDDED_VARIABLE_NODE,
3983 .node_id = PM_NODE_IDENTIFY(parser),
3984 .location = {
3985 .start = operator->start,
3986 .end = variable->location.end
3987 }
3988 },
3989 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3990 .variable = variable
3991 };
3992
3993 return node;
3994}
3995
3999static pm_ensure_node_t *
4000pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4001 pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
4002
4003 *node = (pm_ensure_node_t) {
4004 {
4005 .type = PM_ENSURE_NODE,
4006 .node_id = PM_NODE_IDENTIFY(parser),
4007 .location = {
4008 .start = ensure_keyword->start,
4009 .end = end_keyword->end
4010 },
4011 },
4012 .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
4013 .statements = statements,
4014 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4015 };
4016
4017 return node;
4018}
4019
4023static pm_false_node_t *
4024pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4025 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4026 pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
4027
4028 *node = (pm_false_node_t) {{
4029 .type = PM_FALSE_NODE,
4030 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4031 .node_id = PM_NODE_IDENTIFY(parser),
4032 .location = PM_LOCATION_TOKEN_VALUE(token)
4033 }};
4034
4035 return node;
4036}
4037
4042static pm_find_pattern_node_t *
4043pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4044 pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
4045
4046 pm_node_t *left = nodes->nodes[0];
4047 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4048 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
4049
4050 pm_node_t *right;
4051
4052 if (nodes->size == 1) {
4053 right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
4054 } else {
4055 right = nodes->nodes[nodes->size - 1];
4056 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4057 }
4058
4059#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
4060 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
4061 // The resulting AST will anyway be ignored, but this file still needs to compile.
4062 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
4063#else
4064 pm_node_t *right_splat_node = right;
4065#endif
4066 *node = (pm_find_pattern_node_t) {
4067 {
4068 .type = PM_FIND_PATTERN_NODE,
4069 .node_id = PM_NODE_IDENTIFY(parser),
4070 .location = {
4071 .start = left->location.start,
4072 .end = right->location.end,
4073 },
4074 },
4075 .constant = NULL,
4076 .left = left_splat_node,
4077 .right = right_splat_node,
4078 .requireds = { 0 },
4079 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4080 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4081 };
4082
4083 // For now we're going to just copy over each pointer manually. This could be
4084 // much more efficient, as we could instead resize the node list to only point
4085 // to 1...-1.
4086 for (size_t index = 1; index < nodes->size - 1; index++) {
4087 pm_node_list_append(&node->requireds, nodes->nodes[index]);
4088 }
4089
4090 return node;
4091}
4092
4097static double
4098pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4099 ptrdiff_t diff = token->end - token->start;
4100 if (diff <= 0) return 0.0;
4101
4102 // First, get a buffer of the content.
4103 size_t length = (size_t) diff;
4104 char *buffer = xmalloc(sizeof(char) * (length + 1));
4105 memcpy((void *) buffer, token->start, length);
4106
4107 // Next, determine if we need to replace the decimal point because of
4108 // locale-specific options, and then normalize them if we have to.
4109 char decimal_point = *localeconv()->decimal_point;
4110 if (decimal_point != '.') {
4111 for (size_t index = 0; index < length; index++) {
4112 if (buffer[index] == '.') buffer[index] = decimal_point;
4113 }
4114 }
4115
4116 // Next, handle underscores by removing them from the buffer.
4117 for (size_t index = 0; index < length; index++) {
4118 if (buffer[index] == '_') {
4119 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4120 length--;
4121 }
4122 }
4123
4124 // Null-terminate the buffer so that strtod cannot read off the end.
4125 buffer[length] = '\0';
4126
4127 // Now, call strtod to parse the value. Note that CRuby has their own
4128 // version of strtod which avoids locales. We're okay using the locale-aware
4129 // version because we've already validated through the parser that the token
4130 // is in a valid format.
4131 errno = 0;
4132 char *eptr;
4133 double value = strtod(buffer, &eptr);
4134
4135 // This should never happen, because we've already checked that the token
4136 // is in a valid format. However it's good to be safe.
4137 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4138 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
4139 xfree((void *) buffer);
4140 return 0.0;
4141 }
4142
4143 // If errno is set, then it should only be ERANGE. At this point we need to
4144 // check if it's infinity (it should be).
4145 if (errno == ERANGE && PRISM_ISINF(value)) {
4146 int warn_width;
4147 const char *ellipsis;
4148
4149 if (length > 20) {
4150 warn_width = 20;
4151 ellipsis = "...";
4152 } else {
4153 warn_width = (int) length;
4154 ellipsis = "";
4155 }
4156
4157 pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4158 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4159 }
4160
4161 // Finally we can free the buffer and return the value.
4162 xfree((void *) buffer);
4163 return value;
4164}
4165
4169static pm_float_node_t *
4170pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4171 assert(token->type == PM_TOKEN_FLOAT);
4172 pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
4173
4174 *node = (pm_float_node_t) {
4175 {
4176 .type = PM_FLOAT_NODE,
4177 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4178 .node_id = PM_NODE_IDENTIFY(parser),
4179 .location = PM_LOCATION_TOKEN_VALUE(token)
4180 },
4181 .value = pm_double_parse(parser, token)
4182 };
4183
4184 return node;
4185}
4186
4190static pm_imaginary_node_t *
4191pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4192 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4193
4194 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4195 *node = (pm_imaginary_node_t) {
4196 {
4197 .type = PM_IMAGINARY_NODE,
4198 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4199 .node_id = PM_NODE_IDENTIFY(parser),
4200 .location = PM_LOCATION_TOKEN_VALUE(token)
4201 },
4202 .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4203 .type = PM_TOKEN_FLOAT,
4204 .start = token->start,
4205 .end = token->end - 1
4206 }))
4207 };
4208
4209 return node;
4210}
4211
4215static pm_rational_node_t *
4216pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4217 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4218
4219 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4220 *node = (pm_rational_node_t) {
4221 {
4222 .type = PM_RATIONAL_NODE,
4223 .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4224 .node_id = PM_NODE_IDENTIFY(parser),
4225 .location = PM_LOCATION_TOKEN_VALUE(token)
4226 },
4227 .numerator = { 0 },
4228 .denominator = { 0 }
4229 };
4230
4231 const uint8_t *start = token->start;
4232 const uint8_t *end = token->end - 1; // r
4233
4234 while (start < end && *start == '0') start++; // 0.1 -> .1
4235 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4236
4237 size_t length = (size_t) (end - start);
4238 if (length == 1) {
4239 node->denominator.value = 1;
4240 return node;
4241 }
4242
4243 const uint8_t *point = memchr(start, '.', length);
4244 assert(point && "should have a decimal point");
4245
4246 uint8_t *digits = malloc(length);
4247 if (digits == NULL) {
4248 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4249 abort();
4250 }
4251
4252 memcpy(digits, start, (unsigned long) (point - start));
4253 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4254 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4255
4256 digits[0] = '1';
4257 if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4258 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4259 free(digits);
4260
4261 pm_integers_reduce(&node->numerator, &node->denominator);
4262 return node;
4263}
4264
4269static pm_imaginary_node_t *
4270pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4271 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4272
4273 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4274 *node = (pm_imaginary_node_t) {
4275 {
4276 .type = PM_IMAGINARY_NODE,
4277 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4278 .node_id = PM_NODE_IDENTIFY(parser),
4279 .location = PM_LOCATION_TOKEN_VALUE(token)
4280 },
4281 .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
4282 .type = PM_TOKEN_FLOAT_RATIONAL,
4283 .start = token->start,
4284 .end = token->end - 1
4285 }))
4286 };
4287
4288 return node;
4289}
4290
4294static pm_for_node_t *
4295pm_for_node_create(
4296 pm_parser_t *parser,
4297 pm_node_t *index,
4298 pm_node_t *collection,
4299 pm_statements_node_t *statements,
4300 const pm_token_t *for_keyword,
4301 const pm_token_t *in_keyword,
4302 const pm_token_t *do_keyword,
4303 const pm_token_t *end_keyword
4304) {
4305 pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4306
4307 *node = (pm_for_node_t) {
4308 {
4309 .type = PM_FOR_NODE,
4310 .node_id = PM_NODE_IDENTIFY(parser),
4311 .location = {
4312 .start = for_keyword->start,
4313 .end = end_keyword->end
4314 },
4315 },
4316 .index = index,
4317 .collection = collection,
4318 .statements = statements,
4319 .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4320 .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4321 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4322 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4323 };
4324
4325 return node;
4326}
4327
4331static pm_forwarding_arguments_node_t *
4332pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4333 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4334 pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4335
4336 *node = (pm_forwarding_arguments_node_t) {{
4337 .type = PM_FORWARDING_ARGUMENTS_NODE,
4338 .node_id = PM_NODE_IDENTIFY(parser),
4339 .location = PM_LOCATION_TOKEN_VALUE(token)
4340 }};
4341
4342 return node;
4343}
4344
4348static pm_forwarding_parameter_node_t *
4349pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4350 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4351 pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4352
4353 *node = (pm_forwarding_parameter_node_t) {{
4354 .type = PM_FORWARDING_PARAMETER_NODE,
4355 .node_id = PM_NODE_IDENTIFY(parser),
4356 .location = PM_LOCATION_TOKEN_VALUE(token)
4357 }};
4358
4359 return node;
4360}
4361
4365static pm_forwarding_super_node_t *
4366pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4367 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4368 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4369 pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4370
4371 pm_block_node_t *block = NULL;
4372 if (arguments->block != NULL) {
4373 block = (pm_block_node_t *) arguments->block;
4374 }
4375
4376 *node = (pm_forwarding_super_node_t) {
4377 {
4378 .type = PM_FORWARDING_SUPER_NODE,
4379 .node_id = PM_NODE_IDENTIFY(parser),
4380 .location = {
4381 .start = token->start,
4382 .end = block != NULL ? block->base.location.end : token->end
4383 },
4384 },
4385 .block = block
4386 };
4387
4388 return node;
4389}
4390
4395static pm_hash_pattern_node_t *
4396pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4397 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4398
4399 *node = (pm_hash_pattern_node_t) {
4400 {
4401 .type = PM_HASH_PATTERN_NODE,
4402 .node_id = PM_NODE_IDENTIFY(parser),
4403 .location = {
4404 .start = opening->start,
4405 .end = closing->end
4406 },
4407 },
4408 .constant = NULL,
4409 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4410 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4411 .elements = { 0 },
4412 .rest = NULL
4413 };
4414
4415 return node;
4416}
4417
4421static pm_hash_pattern_node_t *
4422pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4423 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4424
4425 const uint8_t *start;
4426 const uint8_t *end;
4427
4428 if (elements->size > 0) {
4429 if (rest) {
4430 start = elements->nodes[0]->location.start;
4431 end = rest->location.end;
4432 } else {
4433 start = elements->nodes[0]->location.start;
4434 end = elements->nodes[elements->size - 1]->location.end;
4435 }
4436 } else {
4437 assert(rest != NULL);
4438 start = rest->location.start;
4439 end = rest->location.end;
4440 }
4441
4442 *node = (pm_hash_pattern_node_t) {
4443 {
4444 .type = PM_HASH_PATTERN_NODE,
4445 .node_id = PM_NODE_IDENTIFY(parser),
4446 .location = {
4447 .start = start,
4448 .end = end
4449 },
4450 },
4451 .constant = NULL,
4452 .elements = { 0 },
4453 .rest = rest,
4454 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4455 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4456 };
4457
4458 pm_node_t *element;
4459 PM_NODE_LIST_FOREACH(elements, index, element) {
4460 pm_node_list_append(&node->elements, element);
4461 }
4462
4463 return node;
4464}
4465
4469static pm_constant_id_t
4470pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4471 switch (PM_NODE_TYPE(target)) {
4472 case PM_GLOBAL_VARIABLE_READ_NODE:
4473 return ((pm_global_variable_read_node_t *) target)->name;
4474 case PM_BACK_REFERENCE_READ_NODE:
4475 return ((pm_back_reference_read_node_t *) target)->name;
4476 case PM_NUMBERED_REFERENCE_READ_NODE:
4477 // This will only ever happen in the event of a syntax error, but we
4478 // still need to provide something for the node.
4479 return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4480 default:
4481 assert(false && "unreachable");
4482 return (pm_constant_id_t) -1;
4483 }
4484}
4485
4489static pm_global_variable_and_write_node_t *
4490pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4491 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4492 pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4493
4494 *node = (pm_global_variable_and_write_node_t) {
4495 {
4496 .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
4497 .node_id = PM_NODE_IDENTIFY(parser),
4498 .location = {
4499 .start = target->location.start,
4500 .end = value->location.end
4501 }
4502 },
4503 .name = pm_global_variable_write_name(parser, target),
4504 .name_loc = target->location,
4505 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4506 .value = value
4507 };
4508
4509 return node;
4510}
4511
4515static pm_global_variable_operator_write_node_t *
4516pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4517 pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4518
4519 *node = (pm_global_variable_operator_write_node_t) {
4520 {
4521 .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
4522 .node_id = PM_NODE_IDENTIFY(parser),
4523 .location = {
4524 .start = target->location.start,
4525 .end = value->location.end
4526 }
4527 },
4528 .name = pm_global_variable_write_name(parser, target),
4529 .name_loc = target->location,
4530 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4531 .value = value,
4532 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4533 };
4534
4535 return node;
4536}
4537
4541static pm_global_variable_or_write_node_t *
4542pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4543 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4544 pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4545
4546 *node = (pm_global_variable_or_write_node_t) {
4547 {
4548 .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
4549 .node_id = PM_NODE_IDENTIFY(parser),
4550 .location = {
4551 .start = target->location.start,
4552 .end = value->location.end
4553 }
4554 },
4555 .name = pm_global_variable_write_name(parser, target),
4556 .name_loc = target->location,
4557 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4558 .value = value
4559 };
4560
4561 return node;
4562}
4563
4567static pm_global_variable_read_node_t *
4568pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4569 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4570
4571 *node = (pm_global_variable_read_node_t) {
4572 {
4573 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4574 .node_id = PM_NODE_IDENTIFY(parser),
4575 .location = PM_LOCATION_TOKEN_VALUE(name),
4576 },
4577 .name = pm_parser_constant_id_token(parser, name)
4578 };
4579
4580 return node;
4581}
4582
4586static pm_global_variable_read_node_t *
4587pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4588 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4589
4590 *node = (pm_global_variable_read_node_t) {
4591 {
4592 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4593 .node_id = PM_NODE_IDENTIFY(parser),
4594 .location = PM_LOCATION_NULL_VALUE(parser)
4595 },
4596 .name = name
4597 };
4598
4599 return node;
4600}
4601
4605static pm_global_variable_write_node_t *
4606pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4607 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4608
4609 *node = (pm_global_variable_write_node_t) {
4610 {
4611 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4612 .node_id = PM_NODE_IDENTIFY(parser),
4613 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4614 .location = {
4615 .start = target->location.start,
4616 .end = value->location.end
4617 },
4618 },
4619 .name = pm_global_variable_write_name(parser, target),
4620 .name_loc = PM_LOCATION_NODE_VALUE(target),
4621 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4622 .value = value
4623 };
4624
4625 return node;
4626}
4627
4631static pm_global_variable_write_node_t *
4632pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4633 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4634
4635 *node = (pm_global_variable_write_node_t) {
4636 {
4637 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4638 .node_id = PM_NODE_IDENTIFY(parser),
4639 .location = PM_LOCATION_NULL_VALUE(parser)
4640 },
4641 .name = name,
4642 .name_loc = PM_LOCATION_NULL_VALUE(parser),
4643 .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4644 .value = value
4645 };
4646
4647 return node;
4648}
4649
4653static pm_hash_node_t *
4654pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4655 assert(opening != NULL);
4656 pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4657
4658 *node = (pm_hash_node_t) {
4659 {
4660 .type = PM_HASH_NODE,
4661 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4662 .node_id = PM_NODE_IDENTIFY(parser),
4663 .location = PM_LOCATION_TOKEN_VALUE(opening)
4664 },
4665 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4666 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4667 .elements = { 0 }
4668 };
4669
4670 return node;
4671}
4672
4676static inline void
4677pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4678 pm_node_list_append(&hash->elements, element);
4679
4680 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4681 if (static_literal) {
4682 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4683 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4684 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4685 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4686 }
4687
4688 if (!static_literal) {
4689 pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
4690 }
4691}
4692
4693static inline void
4694pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4695 hash->base.location.end = token->end;
4696 hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4697}
4698
4702static pm_if_node_t *
4703pm_if_node_create(pm_parser_t *parser,
4704 const pm_token_t *if_keyword,
4705 pm_node_t *predicate,
4706 const pm_token_t *then_keyword,
4707 pm_statements_node_t *statements,
4708 pm_node_t *subsequent,
4709 const pm_token_t *end_keyword
4710) {
4711 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4712 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4713
4714 const uint8_t *end;
4715 if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4716 end = end_keyword->end;
4717 } else if (subsequent != NULL) {
4718 end = subsequent->location.end;
4719 } else if (pm_statements_node_body_length(statements) != 0) {
4720 end = statements->base.location.end;
4721 } else {
4722 end = predicate->location.end;
4723 }
4724
4725 *node = (pm_if_node_t) {
4726 {
4727 .type = PM_IF_NODE,
4728 .flags = PM_NODE_FLAG_NEWLINE,
4729 .node_id = PM_NODE_IDENTIFY(parser),
4730 .location = {
4731 .start = if_keyword->start,
4732 .end = end
4733 },
4734 },
4735 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4736 .predicate = predicate,
4737 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4738 .statements = statements,
4739 .subsequent = subsequent,
4740 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4741 };
4742
4743 return node;
4744}
4745
4749static pm_if_node_t *
4750pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4751 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4752 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4753
4754 pm_statements_node_t *statements = pm_statements_node_create(parser);
4755 pm_statements_node_body_append(parser, statements, statement, true);
4756
4757 *node = (pm_if_node_t) {
4758 {
4759 .type = PM_IF_NODE,
4760 .flags = PM_NODE_FLAG_NEWLINE,
4761 .node_id = PM_NODE_IDENTIFY(parser),
4762 .location = {
4763 .start = statement->location.start,
4764 .end = predicate->location.end
4765 },
4766 },
4767 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4768 .predicate = predicate,
4769 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4770 .statements = statements,
4771 .subsequent = NULL,
4772 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4773 };
4774
4775 return node;
4776}
4777
4781static pm_if_node_t *
4782pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4783 pm_assert_value_expression(parser, predicate);
4784 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4785
4786 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4787 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4788
4789 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4790 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4791
4792 pm_token_t end_keyword = not_provided(parser);
4793 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4794
4795 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4796
4797 *node = (pm_if_node_t) {
4798 {
4799 .type = PM_IF_NODE,
4800 .flags = PM_NODE_FLAG_NEWLINE,
4801 .node_id = PM_NODE_IDENTIFY(parser),
4802 .location = {
4803 .start = predicate->location.start,
4804 .end = false_expression->location.end,
4805 },
4806 },
4807 .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4808 .predicate = predicate,
4809 .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4810 .statements = if_statements,
4811 .subsequent = (pm_node_t *) else_node,
4812 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4813 };
4814
4815 return node;
4816
4817}
4818
4819static inline void
4820pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4821 node->base.location.end = keyword->end;
4822 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4823}
4824
4825static inline void
4826pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4827 node->base.location.end = keyword->end;
4828 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4829}
4830
4834static pm_implicit_node_t *
4835pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4836 pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4837
4838 *node = (pm_implicit_node_t) {
4839 {
4840 .type = PM_IMPLICIT_NODE,
4841 .node_id = PM_NODE_IDENTIFY(parser),
4842 .location = value->location
4843 },
4844 .value = value
4845 };
4846
4847 return node;
4848}
4849
4853static pm_implicit_rest_node_t *
4854pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4855 assert(token->type == PM_TOKEN_COMMA);
4856
4857 pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4858
4859 *node = (pm_implicit_rest_node_t) {
4860 {
4861 .type = PM_IMPLICIT_REST_NODE,
4862 .node_id = PM_NODE_IDENTIFY(parser),
4863 .location = PM_LOCATION_TOKEN_VALUE(token)
4864 }
4865 };
4866
4867 return node;
4868}
4869
4873static pm_integer_node_t *
4874pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4875 assert(token->type == PM_TOKEN_INTEGER);
4876 pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4877
4878 *node = (pm_integer_node_t) {
4879 {
4880 .type = PM_INTEGER_NODE,
4881 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4882 .node_id = PM_NODE_IDENTIFY(parser),
4883 .location = PM_LOCATION_TOKEN_VALUE(token)
4884 },
4885 .value = { 0 }
4886 };
4887
4888 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4889 switch (base) {
4890 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4891 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4892 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4893 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4894 default: assert(false && "unreachable"); break;
4895 }
4896
4897 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4898 return node;
4899}
4900
4905static pm_imaginary_node_t *
4906pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4907 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4908
4909 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4910 *node = (pm_imaginary_node_t) {
4911 {
4912 .type = PM_IMAGINARY_NODE,
4913 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4914 .node_id = PM_NODE_IDENTIFY(parser),
4915 .location = PM_LOCATION_TOKEN_VALUE(token)
4916 },
4917 .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4918 .type = PM_TOKEN_INTEGER,
4919 .start = token->start,
4920 .end = token->end - 1
4921 }))
4922 };
4923
4924 return node;
4925}
4926
4931static pm_rational_node_t *
4932pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4933 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4934
4935 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4936 *node = (pm_rational_node_t) {
4937 {
4938 .type = PM_RATIONAL_NODE,
4939 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4940 .node_id = PM_NODE_IDENTIFY(parser),
4941 .location = PM_LOCATION_TOKEN_VALUE(token)
4942 },
4943 .numerator = { 0 },
4944 .denominator = { .value = 1, 0 }
4945 };
4946
4947 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4948 switch (base) {
4949 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4950 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4951 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4952 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4953 default: assert(false && "unreachable"); break;
4954 }
4955
4956 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4957
4958 return node;
4959}
4960
4965static pm_imaginary_node_t *
4966pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4967 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4968
4969 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4970 *node = (pm_imaginary_node_t) {
4971 {
4972 .type = PM_IMAGINARY_NODE,
4973 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4974 .node_id = PM_NODE_IDENTIFY(parser),
4975 .location = PM_LOCATION_TOKEN_VALUE(token)
4976 },
4977 .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4978 .type = PM_TOKEN_INTEGER_RATIONAL,
4979 .start = token->start,
4980 .end = token->end - 1
4981 }))
4982 };
4983
4984 return node;
4985}
4986
4990static pm_in_node_t *
4991pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4992 pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
4993
4994 const uint8_t *end;
4995 if (statements != NULL) {
4996 end = statements->base.location.end;
4997 } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4998 end = then_keyword->end;
4999 } else {
5000 end = pattern->location.end;
5001 }
5002
5003 *node = (pm_in_node_t) {
5004 {
5005 .type = PM_IN_NODE,
5006 .node_id = PM_NODE_IDENTIFY(parser),
5007 .location = {
5008 .start = in_keyword->start,
5009 .end = end
5010 },
5011 },
5012 .pattern = pattern,
5013 .statements = statements,
5014 .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
5015 .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
5016 };
5017
5018 return node;
5019}
5020
5024static pm_instance_variable_and_write_node_t *
5025pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5026 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5027 pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
5028
5029 *node = (pm_instance_variable_and_write_node_t) {
5030 {
5031 .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
5032 .node_id = PM_NODE_IDENTIFY(parser),
5033 .location = {
5034 .start = target->base.location.start,
5035 .end = value->location.end
5036 }
5037 },
5038 .name = target->name,
5039 .name_loc = target->base.location,
5040 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5041 .value = value
5042 };
5043
5044 return node;
5045}
5046
5050static pm_instance_variable_operator_write_node_t *
5051pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5052 pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
5053
5054 *node = (pm_instance_variable_operator_write_node_t) {
5055 {
5056 .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
5057 .node_id = PM_NODE_IDENTIFY(parser),
5058 .location = {
5059 .start = target->base.location.start,
5060 .end = value->location.end
5061 }
5062 },
5063 .name = target->name,
5064 .name_loc = target->base.location,
5065 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5066 .value = value,
5067 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5068 };
5069
5070 return node;
5071}
5072
5076static pm_instance_variable_or_write_node_t *
5077pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5078 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5079 pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
5080
5081 *node = (pm_instance_variable_or_write_node_t) {
5082 {
5083 .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
5084 .node_id = PM_NODE_IDENTIFY(parser),
5085 .location = {
5086 .start = target->base.location.start,
5087 .end = value->location.end
5088 }
5089 },
5090 .name = target->name,
5091 .name_loc = target->base.location,
5092 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5093 .value = value
5094 };
5095
5096 return node;
5097}
5098
5102static pm_instance_variable_read_node_t *
5103pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
5104 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
5105 pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
5106
5107 *node = (pm_instance_variable_read_node_t) {
5108 {
5109 .type = PM_INSTANCE_VARIABLE_READ_NODE,
5110 .node_id = PM_NODE_IDENTIFY(parser),
5111 .location = PM_LOCATION_TOKEN_VALUE(token)
5112 },
5113 .name = pm_parser_constant_id_token(parser, token)
5114 };
5115
5116 return node;
5117}
5118
5123static pm_instance_variable_write_node_t *
5124pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
5125 pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
5126 *node = (pm_instance_variable_write_node_t) {
5127 {
5128 .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
5129 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5130 .node_id = PM_NODE_IDENTIFY(parser),
5131 .location = {
5132 .start = read_node->base.location.start,
5133 .end = value->location.end
5134 }
5135 },
5136 .name = read_node->name,
5137 .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
5138 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
5139 .value = value
5140 };
5141
5142 return node;
5143}
5144
5150static void
5151pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5152 switch (PM_NODE_TYPE(part)) {
5153 case PM_STRING_NODE:
5154 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5155 break;
5156 case PM_EMBEDDED_STATEMENTS_NODE: {
5157 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5158 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5159
5160 if (embedded == NULL) {
5161 // If there are no statements or more than one statement, then
5162 // we lose the static literal flag.
5163 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5164 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5165 // If the embedded statement is a string, then we can keep the
5166 // static literal flag and mark the string as frozen.
5167 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5168 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5169 // If the embedded statement is an interpolated string and it's
5170 // a static literal, then we can keep the static literal flag.
5171 } else {
5172 // Otherwise we lose the static literal flag.
5173 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5174 }
5175
5176 break;
5177 }
5178 case PM_EMBEDDED_VARIABLE_NODE:
5179 pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5180 break;
5181 default:
5182 assert(false && "unexpected node type");
5183 break;
5184 }
5185
5186 pm_node_list_append(parts, part);
5187}
5188
5192static pm_interpolated_regular_expression_node_t *
5193pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5194 pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
5195
5196 *node = (pm_interpolated_regular_expression_node_t) {
5197 {
5198 .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
5199 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5200 .node_id = PM_NODE_IDENTIFY(parser),
5201 .location = {
5202 .start = opening->start,
5203 .end = NULL,
5204 },
5205 },
5206 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5207 .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
5208 .parts = { 0 }
5209 };
5210
5211 return node;
5212}
5213
5214static inline void
5215pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5216 if (node->base.location.start > part->location.start) {
5217 node->base.location.start = part->location.start;
5218 }
5219 if (node->base.location.end < part->location.end) {
5220 node->base.location.end = part->location.end;
5221 }
5222
5223 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5224}
5225
5226static inline void
5227pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5228 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
5229 node->base.location.end = closing->end;
5230 pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
5231}
5232
5256static inline void
5257pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5258#define CLEAR_FLAGS(node) \
5259 node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5260
5261#define MUTABLE_FLAGS(node) \
5262 node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5263
5264 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5265 node->base.location.start = part->location.start;
5266 }
5267
5268 node->base.location.end = MAX(node->base.location.end, part->location.end);
5269
5270 switch (PM_NODE_TYPE(part)) {
5271 case PM_STRING_NODE:
5272 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5273 break;
5274 case PM_INTERPOLATED_STRING_NODE:
5275 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5276 // If the string that we're concatenating is a static literal,
5277 // then we can keep the static literal flag for this string.
5278 } else {
5279 // Otherwise, we lose the static literal flag here and we should
5280 // also clear the mutability flags.
5281 CLEAR_FLAGS(node);
5282 }
5283 break;
5284 case PM_EMBEDDED_STATEMENTS_NODE: {
5285 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5286 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5287
5288 if (embedded == NULL) {
5289 // If we're embedding multiple statements or no statements, then
5290 // the string is not longer a static literal.
5291 CLEAR_FLAGS(node);
5292 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5293 // If the embedded statement is a string, then we can make that
5294 // string as frozen and static literal, and not touch the static
5295 // literal status of this string.
5296 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5297
5298 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5299 MUTABLE_FLAGS(node);
5300 }
5301 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5302 // If the embedded statement is an interpolated string, but that
5303 // string is marked as static literal, then we can keep our
5304 // static literal status for this string.
5305 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5306 MUTABLE_FLAGS(node);
5307 }
5308 } else {
5309 // In all other cases, we lose the static literal flag here and
5310 // become mutable.
5311 CLEAR_FLAGS(node);
5312 }
5313
5314 break;
5315 }
5316 case PM_EMBEDDED_VARIABLE_NODE:
5317 // Embedded variables clear static literal, which means we also
5318 // should clear the mutability flags.
5319 CLEAR_FLAGS(node);
5320 break;
5321 default:
5322 assert(false && "unexpected node type");
5323 break;
5324 }
5325
5326 pm_node_list_append(&node->parts, part);
5327
5328#undef CLEAR_FLAGS
5329#undef MUTABLE_FLAGS
5330}
5331
5335static pm_interpolated_string_node_t *
5336pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5337 pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
5338 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5339
5340 switch (parser->frozen_string_literal) {
5341 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5342 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5343 break;
5344 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5345 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5346 break;
5347 }
5348
5349 *node = (pm_interpolated_string_node_t) {
5350 {
5351 .type = PM_INTERPOLATED_STRING_NODE,
5352 .flags = flags,
5353 .node_id = PM_NODE_IDENTIFY(parser),
5354 .location = {
5355 .start = opening->start,
5356 .end = closing->end,
5357 },
5358 },
5359 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5360 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5361 .parts = { 0 }
5362 };
5363
5364 if (parts != NULL) {
5365 pm_node_t *part;
5366 PM_NODE_LIST_FOREACH(parts, index, part) {
5367 pm_interpolated_string_node_append(node, part);
5368 }
5369 }
5370
5371 return node;
5372}
5373
5377static void
5378pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5379 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5380 node->base.location.end = closing->end;
5381}
5382
5383static void
5384pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5385 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5386 node->base.location.start = part->location.start;
5387 }
5388
5389 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5390 node->base.location.end = MAX(node->base.location.end, part->location.end);
5391}
5392
5393static void
5394pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5395 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5396 node->base.location.end = closing->end;
5397}
5398
5402static pm_interpolated_symbol_node_t *
5403pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5404 pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
5405
5406 *node = (pm_interpolated_symbol_node_t) {
5407 {
5408 .type = PM_INTERPOLATED_SYMBOL_NODE,
5409 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5410 .node_id = PM_NODE_IDENTIFY(parser),
5411 .location = {
5412 .start = opening->start,
5413 .end = closing->end,
5414 },
5415 },
5416 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5417 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5418 .parts = { 0 }
5419 };
5420
5421 if (parts != NULL) {
5422 pm_node_t *part;
5423 PM_NODE_LIST_FOREACH(parts, index, part) {
5424 pm_interpolated_symbol_node_append(node, part);
5425 }
5426 }
5427
5428 return node;
5429}
5430
5434static pm_interpolated_x_string_node_t *
5435pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5436 pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
5437
5438 *node = (pm_interpolated_x_string_node_t) {
5439 {
5440 .type = PM_INTERPOLATED_X_STRING_NODE,
5441 .node_id = PM_NODE_IDENTIFY(parser),
5442 .location = {
5443 .start = opening->start,
5444 .end = closing->end
5445 },
5446 },
5447 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5448 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5449 .parts = { 0 }
5450 };
5451
5452 return node;
5453}
5454
5455static inline void
5456pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5457 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5458 node->base.location.end = part->location.end;
5459}
5460
5461static inline void
5462pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5463 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5464 node->base.location.end = closing->end;
5465}
5466
5470static pm_it_local_variable_read_node_t *
5471pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5472 pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
5473
5474 *node = (pm_it_local_variable_read_node_t) {
5475 {
5476 .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5477 .node_id = PM_NODE_IDENTIFY(parser),
5478 .location = PM_LOCATION_TOKEN_VALUE(name)
5479 }
5480 };
5481
5482 return node;
5483}
5484
5488static pm_it_parameters_node_t *
5489pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5490 pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5491
5492 *node = (pm_it_parameters_node_t) {
5493 {
5494 .type = PM_IT_PARAMETERS_NODE,
5495 .node_id = PM_NODE_IDENTIFY(parser),
5496 .location = {
5497 .start = opening->start,
5498 .end = closing->end
5499 }
5500 }
5501 };
5502
5503 return node;
5504}
5505
5509static pm_keyword_hash_node_t *
5510pm_keyword_hash_node_create(pm_parser_t *parser) {
5511 pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5512
5513 *node = (pm_keyword_hash_node_t) {
5514 .base = {
5515 .type = PM_KEYWORD_HASH_NODE,
5516 .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5517 .node_id = PM_NODE_IDENTIFY(parser),
5518 .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5519 },
5520 .elements = { 0 }
5521 };
5522
5523 return node;
5524}
5525
5529static void
5530pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5531 // If the element being added is not an AssocNode or does not have a symbol
5532 // key, then we want to turn the SYMBOL_KEYS flag off.
5533 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5534 pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5535 }
5536
5537 pm_node_list_append(&hash->elements, element);
5538 if (hash->base.location.start == NULL) {
5539 hash->base.location.start = element->location.start;
5540 }
5541 hash->base.location.end = element->location.end;
5542}
5543
5547static pm_required_keyword_parameter_node_t *
5548pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5549 pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5550
5551 *node = (pm_required_keyword_parameter_node_t) {
5552 {
5553 .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
5554 .node_id = PM_NODE_IDENTIFY(parser),
5555 .location = {
5556 .start = name->start,
5557 .end = name->end
5558 },
5559 },
5560 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5561 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5562 };
5563
5564 return node;
5565}
5566
5570static pm_optional_keyword_parameter_node_t *
5571pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5572 pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5573
5574 *node = (pm_optional_keyword_parameter_node_t) {
5575 {
5576 .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
5577 .node_id = PM_NODE_IDENTIFY(parser),
5578 .location = {
5579 .start = name->start,
5580 .end = value->location.end
5581 },
5582 },
5583 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5584 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5585 .value = value
5586 };
5587
5588 return node;
5589}
5590
5594static pm_keyword_rest_parameter_node_t *
5595pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5596 pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5597
5598 *node = (pm_keyword_rest_parameter_node_t) {
5599 {
5600 .type = PM_KEYWORD_REST_PARAMETER_NODE,
5601 .node_id = PM_NODE_IDENTIFY(parser),
5602 .location = {
5603 .start = operator->start,
5604 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
5605 },
5606 },
5607 .name = pm_parser_optional_constant_id_token(parser, name),
5608 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5609 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5610 };
5611
5612 return node;
5613}
5614
5618static pm_lambda_node_t *
5619pm_lambda_node_create(
5620 pm_parser_t *parser,
5621 pm_constant_id_list_t *locals,
5622 const pm_token_t *operator,
5623 const pm_token_t *opening,
5624 const pm_token_t *closing,
5625 pm_node_t *parameters,
5626 pm_node_t *body
5627) {
5628 pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5629
5630 *node = (pm_lambda_node_t) {
5631 {
5632 .type = PM_LAMBDA_NODE,
5633 .node_id = PM_NODE_IDENTIFY(parser),
5634 .location = {
5635 .start = operator->start,
5636 .end = closing->end
5637 },
5638 },
5639 .locals = *locals,
5640 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5641 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5642 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5643 .parameters = parameters,
5644 .body = body
5645 };
5646
5647 return node;
5648}
5649
5653static pm_local_variable_and_write_node_t *
5654pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5655 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5656 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5657 pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5658
5659 *node = (pm_local_variable_and_write_node_t) {
5660 {
5661 .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
5662 .node_id = PM_NODE_IDENTIFY(parser),
5663 .location = {
5664 .start = target->location.start,
5665 .end = value->location.end
5666 }
5667 },
5668 .name_loc = target->location,
5669 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5670 .value = value,
5671 .name = name,
5672 .depth = depth
5673 };
5674
5675 return node;
5676}
5677
5681static pm_local_variable_operator_write_node_t *
5682pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5683 pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5684
5685 *node = (pm_local_variable_operator_write_node_t) {
5686 {
5687 .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
5688 .node_id = PM_NODE_IDENTIFY(parser),
5689 .location = {
5690 .start = target->location.start,
5691 .end = value->location.end
5692 }
5693 },
5694 .name_loc = target->location,
5695 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5696 .value = value,
5697 .name = name,
5698 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5699 .depth = depth
5700 };
5701
5702 return node;
5703}
5704
5708static pm_local_variable_or_write_node_t *
5709pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5710 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5711 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5712 pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5713
5714 *node = (pm_local_variable_or_write_node_t) {
5715 {
5716 .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
5717 .node_id = PM_NODE_IDENTIFY(parser),
5718 .location = {
5719 .start = target->location.start,
5720 .end = value->location.end
5721 }
5722 },
5723 .name_loc = target->location,
5724 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5725 .value = value,
5726 .name = name,
5727 .depth = depth
5728 };
5729
5730 return node;
5731}
5732
5736static pm_local_variable_read_node_t *
5737pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5738 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5739
5740 pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5741
5742 *node = (pm_local_variable_read_node_t) {
5743 {
5744 .type = PM_LOCAL_VARIABLE_READ_NODE,
5745 .node_id = PM_NODE_IDENTIFY(parser),
5746 .location = PM_LOCATION_TOKEN_VALUE(name)
5747 },
5748 .name = name_id,
5749 .depth = depth
5750 };
5751
5752 return node;
5753}
5754
5758static pm_local_variable_read_node_t *
5759pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5760 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5761 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5762}
5763
5768static pm_local_variable_read_node_t *
5769pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5770 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5771 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5772}
5773
5777static pm_local_variable_write_node_t *
5778pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5779 pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5780
5781 *node = (pm_local_variable_write_node_t) {
5782 {
5783 .type = PM_LOCAL_VARIABLE_WRITE_NODE,
5784 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5785 .node_id = PM_NODE_IDENTIFY(parser),
5786 .location = {
5787 .start = name_loc->start,
5788 .end = value->location.end
5789 }
5790 },
5791 .name = name,
5792 .depth = depth,
5793 .value = value,
5794 .name_loc = *name_loc,
5795 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5796 };
5797
5798 return node;
5799}
5800
5804static inline bool
5805pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5806 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5807}
5808
5813static inline bool
5814pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5815 return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5816}
5817
5822static inline void
5823pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5824 if (pm_token_is_numbered_parameter(start, end)) {
5825 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5826 }
5827}
5828
5833static pm_local_variable_target_node_t *
5834pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5835 pm_refute_numbered_parameter(parser, location->start, location->end);
5836 pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5837
5838 *node = (pm_local_variable_target_node_t) {
5839 {
5840 .type = PM_LOCAL_VARIABLE_TARGET_NODE,
5841 .node_id = PM_NODE_IDENTIFY(parser),
5842 .location = *location
5843 },
5844 .name = name,
5845 .depth = depth
5846 };
5847
5848 return node;
5849}
5850
5854static pm_match_predicate_node_t *
5855pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5856 pm_assert_value_expression(parser, value);
5857
5858 pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5859
5860 *node = (pm_match_predicate_node_t) {
5861 {
5862 .type = PM_MATCH_PREDICATE_NODE,
5863 .node_id = PM_NODE_IDENTIFY(parser),
5864 .location = {
5865 .start = value->location.start,
5866 .end = pattern->location.end
5867 }
5868 },
5869 .value = value,
5870 .pattern = pattern,
5871 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5872 };
5873
5874 return node;
5875}
5876
5880static pm_match_required_node_t *
5881pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5882 pm_assert_value_expression(parser, value);
5883
5884 pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5885
5886 *node = (pm_match_required_node_t) {
5887 {
5888 .type = PM_MATCH_REQUIRED_NODE,
5889 .node_id = PM_NODE_IDENTIFY(parser),
5890 .location = {
5891 .start = value->location.start,
5892 .end = pattern->location.end
5893 }
5894 },
5895 .value = value,
5896 .pattern = pattern,
5897 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5898 };
5899
5900 return node;
5901}
5902
5906static pm_match_write_node_t *
5907pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5908 pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5909
5910 *node = (pm_match_write_node_t) {
5911 {
5912 .type = PM_MATCH_WRITE_NODE,
5913 .node_id = PM_NODE_IDENTIFY(parser),
5914 .location = call->base.location
5915 },
5916 .call = call,
5917 .targets = { 0 }
5918 };
5919
5920 return node;
5921}
5922
5926static pm_module_node_t *
5927pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5928 pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5929
5930 *node = (pm_module_node_t) {
5931 {
5932 .type = PM_MODULE_NODE,
5933 .node_id = PM_NODE_IDENTIFY(parser),
5934 .location = {
5935 .start = module_keyword->start,
5936 .end = end_keyword->end
5937 }
5938 },
5939 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5940 .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5941 .constant_path = constant_path,
5942 .body = body,
5943 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5944 .name = pm_parser_constant_id_token(parser, name)
5945 };
5946
5947 return node;
5948}
5949
5953static pm_multi_target_node_t *
5954pm_multi_target_node_create(pm_parser_t *parser) {
5955 pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5956
5957 *node = (pm_multi_target_node_t) {
5958 {
5959 .type = PM_MULTI_TARGET_NODE,
5960 .node_id = PM_NODE_IDENTIFY(parser),
5961 .location = { .start = NULL, .end = NULL }
5962 },
5963 .lefts = { 0 },
5964 .rest = NULL,
5965 .rights = { 0 },
5966 .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5967 .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5968 };
5969
5970 return node;
5971}
5972
5976static void
5977pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5978 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5979 if (node->rest == NULL) {
5980 node->rest = target;
5981 } else {
5982 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5983 pm_node_list_append(&node->rights, target);
5984 }
5985 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5986 if (node->rest == NULL) {
5987 node->rest = target;
5988 } else {
5989 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5990 pm_node_list_append(&node->rights, target);
5991 }
5992 } else if (node->rest == NULL) {
5993 pm_node_list_append(&node->lefts, target);
5994 } else {
5995 pm_node_list_append(&node->rights, target);
5996 }
5997
5998 if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
5999 node->base.location.start = target->location.start;
6000 }
6001
6002 if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
6003 node->base.location.end = target->location.end;
6004 }
6005}
6006
6010static void
6011pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
6012 node->base.location.start = lparen->start;
6013 node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
6014}
6015
6019static void
6020pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
6021 node->base.location.end = rparen->end;
6022 node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
6023}
6024
6028static pm_multi_write_node_t *
6029pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
6030 pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
6031
6032 *node = (pm_multi_write_node_t) {
6033 {
6034 .type = PM_MULTI_WRITE_NODE,
6035 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
6036 .node_id = PM_NODE_IDENTIFY(parser),
6037 .location = {
6038 .start = target->base.location.start,
6039 .end = value->location.end
6040 }
6041 },
6042 .lefts = target->lefts,
6043 .rest = target->rest,
6044 .rights = target->rights,
6045 .lparen_loc = target->lparen_loc,
6046 .rparen_loc = target->rparen_loc,
6047 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6048 .value = value
6049 };
6050
6051 // Explicitly do not call pm_node_destroy here because we want to keep
6052 // around all of the information within the MultiWriteNode node.
6053 xfree(target);
6054
6055 return node;
6056}
6057
6061static pm_next_node_t *
6062pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6063 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
6064 pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
6065
6066 *node = (pm_next_node_t) {
6067 {
6068 .type = PM_NEXT_NODE,
6069 .node_id = PM_NODE_IDENTIFY(parser),
6070 .location = {
6071 .start = keyword->start,
6072 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6073 }
6074 },
6075 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6076 .arguments = arguments
6077 };
6078
6079 return node;
6080}
6081
6085static pm_nil_node_t *
6086pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
6087 assert(token->type == PM_TOKEN_KEYWORD_NIL);
6088 pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
6089
6090 *node = (pm_nil_node_t) {{
6091 .type = PM_NIL_NODE,
6092 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6093 .node_id = PM_NODE_IDENTIFY(parser),
6094 .location = PM_LOCATION_TOKEN_VALUE(token)
6095 }};
6096
6097 return node;
6098}
6099
6103static pm_no_keywords_parameter_node_t *
6104pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
6105 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
6106 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
6107 pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
6108
6109 *node = (pm_no_keywords_parameter_node_t) {
6110 {
6111 .type = PM_NO_KEYWORDS_PARAMETER_NODE,
6112 .node_id = PM_NODE_IDENTIFY(parser),
6113 .location = {
6114 .start = operator->start,
6115 .end = keyword->end
6116 }
6117 },
6118 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6119 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
6120 };
6121
6122 return node;
6123}
6124
6128static pm_numbered_parameters_node_t *
6129pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
6130 pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
6131
6132 *node = (pm_numbered_parameters_node_t) {
6133 {
6134 .type = PM_NUMBERED_PARAMETERS_NODE,
6135 .node_id = PM_NODE_IDENTIFY(parser),
6136 .location = *location
6137 },
6138 .maximum = maximum
6139 };
6140
6141 return node;
6142}
6143
6148#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
6149
6156static uint32_t
6157pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
6158 const uint8_t *start = token->start + 1;
6159 const uint8_t *end = token->end;
6160
6161 ptrdiff_t diff = end - start;
6162 assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
6163 size_t length = (size_t) diff;
6164
6165 char *digits = xcalloc(length + 1, sizeof(char));
6166 memcpy(digits, start, length);
6167 digits[length] = '\0';
6168
6169 char *endptr;
6170 errno = 0;
6171 unsigned long value = strtoul(digits, &endptr, 10);
6172
6173 if ((digits == endptr) || (*endptr != '\0')) {
6174 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
6175 value = 0;
6176 }
6177
6178 xfree(digits);
6179
6180 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
6181 PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
6182 value = 0;
6183 }
6184
6185 return (uint32_t) value;
6186}
6187
6188#undef NTH_REF_MAX
6189
6193static pm_numbered_reference_read_node_t *
6194pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
6195 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
6196 pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
6197
6198 *node = (pm_numbered_reference_read_node_t) {
6199 {
6200 .type = PM_NUMBERED_REFERENCE_READ_NODE,
6201 .node_id = PM_NODE_IDENTIFY(parser),
6202 .location = PM_LOCATION_TOKEN_VALUE(name),
6203 },
6204 .number = pm_numbered_reference_read_node_number(parser, name)
6205 };
6206
6207 return node;
6208}
6209
6213static pm_optional_parameter_node_t *
6214pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
6215 pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
6216
6217 *node = (pm_optional_parameter_node_t) {
6218 {
6219 .type = PM_OPTIONAL_PARAMETER_NODE,
6220 .node_id = PM_NODE_IDENTIFY(parser),
6221 .location = {
6222 .start = name->start,
6223 .end = value->location.end
6224 }
6225 },
6226 .name = pm_parser_constant_id_token(parser, name),
6227 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
6228 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6229 .value = value
6230 };
6231
6232 return node;
6233}
6234
6238static pm_or_node_t *
6239pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6240 pm_assert_value_expression(parser, left);
6241
6242 pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
6243
6244 *node = (pm_or_node_t) {
6245 {
6246 .type = PM_OR_NODE,
6247 .node_id = PM_NODE_IDENTIFY(parser),
6248 .location = {
6249 .start = left->location.start,
6250 .end = right->location.end
6251 }
6252 },
6253 .left = left,
6254 .right = right,
6255 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6256 };
6257
6258 return node;
6259}
6260
6264static pm_parameters_node_t *
6265pm_parameters_node_create(pm_parser_t *parser) {
6266 pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
6267
6268 *node = (pm_parameters_node_t) {
6269 {
6270 .type = PM_PARAMETERS_NODE,
6271 .node_id = PM_NODE_IDENTIFY(parser),
6272 .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
6273 },
6274 .rest = NULL,
6275 .keyword_rest = NULL,
6276 .block = NULL,
6277 .requireds = { 0 },
6278 .optionals = { 0 },
6279 .posts = { 0 },
6280 .keywords = { 0 }
6281 };
6282
6283 return node;
6284}
6285
6289static void
6290pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
6291 if (params->base.location.start == NULL) {
6292 params->base.location.start = param->location.start;
6293 } else {
6294 params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
6295 }
6296
6297 if (params->base.location.end == NULL) {
6298 params->base.location.end = param->location.end;
6299 } else {
6300 params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
6301 }
6302}
6303
6307static void
6308pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
6309 pm_parameters_node_location_set(params, param);
6310 pm_node_list_append(&params->requireds, param);
6311}
6312
6316static void
6317pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6318 pm_parameters_node_location_set(params, (pm_node_t *) param);
6319 pm_node_list_append(&params->optionals, (pm_node_t *) param);
6320}
6321
6325static void
6326pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
6327 pm_parameters_node_location_set(params, param);
6328 pm_node_list_append(&params->posts, param);
6329}
6330
6334static void
6335pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6336 pm_parameters_node_location_set(params, param);
6337 params->rest = param;
6338}
6339
6343static void
6344pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
6345 pm_parameters_node_location_set(params, param);
6346 pm_node_list_append(&params->keywords, param);
6347}
6348
6352static void
6353pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6354 assert(params->keyword_rest == NULL);
6355 pm_parameters_node_location_set(params, param);
6356 params->keyword_rest = param;
6357}
6358
6362static void
6363pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
6364 assert(params->block == NULL);
6365 pm_parameters_node_location_set(params, (pm_node_t *) param);
6366 params->block = param;
6367}
6368
6372static pm_program_node_t *
6373pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6374 pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
6375
6376 *node = (pm_program_node_t) {
6377 {
6378 .type = PM_PROGRAM_NODE,
6379 .node_id = PM_NODE_IDENTIFY(parser),
6380 .location = {
6381 .start = statements == NULL ? parser->start : statements->base.location.start,
6382 .end = statements == NULL ? parser->end : statements->base.location.end
6383 }
6384 },
6385 .locals = *locals,
6386 .statements = statements
6387 };
6388
6389 return node;
6390}
6391
6395static pm_parentheses_node_t *
6396pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing) {
6397 pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
6398
6399 *node = (pm_parentheses_node_t) {
6400 {
6401 .type = PM_PARENTHESES_NODE,
6402 .node_id = PM_NODE_IDENTIFY(parser),
6403 .location = {
6404 .start = opening->start,
6405 .end = closing->end
6406 }
6407 },
6408 .body = body,
6409 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6410 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6411 };
6412
6413 return node;
6414}
6415
6419static pm_pinned_expression_node_t *
6420pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6421 pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
6422
6423 *node = (pm_pinned_expression_node_t) {
6424 {
6425 .type = PM_PINNED_EXPRESSION_NODE,
6426 .node_id = PM_NODE_IDENTIFY(parser),
6427 .location = {
6428 .start = operator->start,
6429 .end = rparen->end
6430 }
6431 },
6432 .expression = expression,
6433 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6434 .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
6435 .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
6436 };
6437
6438 return node;
6439}
6440
6444static pm_pinned_variable_node_t *
6445pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6446 pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
6447
6448 *node = (pm_pinned_variable_node_t) {
6449 {
6450 .type = PM_PINNED_VARIABLE_NODE,
6451 .node_id = PM_NODE_IDENTIFY(parser),
6452 .location = {
6453 .start = operator->start,
6454 .end = variable->location.end
6455 }
6456 },
6457 .variable = variable,
6458 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6459 };
6460
6461 return node;
6462}
6463
6467static pm_post_execution_node_t *
6468pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6469 pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
6470
6471 *node = (pm_post_execution_node_t) {
6472 {
6473 .type = PM_POST_EXECUTION_NODE,
6474 .node_id = PM_NODE_IDENTIFY(parser),
6475 .location = {
6476 .start = keyword->start,
6477 .end = closing->end
6478 }
6479 },
6480 .statements = statements,
6481 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6482 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6483 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6484 };
6485
6486 return node;
6487}
6488
6492static pm_pre_execution_node_t *
6493pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6494 pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
6495
6496 *node = (pm_pre_execution_node_t) {
6497 {
6498 .type = PM_PRE_EXECUTION_NODE,
6499 .node_id = PM_NODE_IDENTIFY(parser),
6500 .location = {
6501 .start = keyword->start,
6502 .end = closing->end
6503 }
6504 },
6505 .statements = statements,
6506 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6507 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6508 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6509 };
6510
6511 return node;
6512}
6513
6517static pm_range_node_t *
6518pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6519 pm_assert_value_expression(parser, left);
6520 pm_assert_value_expression(parser, right);
6521
6522 pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
6523 pm_node_flags_t flags = 0;
6524
6525 // Indicate that this node is an exclusive range if the operator is `...`.
6526 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6527 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6528 }
6529
6530 // Indicate that this node is a static literal (i.e., can be compiled with
6531 // a putobject in CRuby) if the left and right are implicit nil, explicit
6532 // nil, or integers.
6533 if (
6534 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6535 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6536 ) {
6537 flags |= PM_NODE_FLAG_STATIC_LITERAL;
6538 }
6539
6540 *node = (pm_range_node_t) {
6541 {
6542 .type = PM_RANGE_NODE,
6543 .flags = flags,
6544 .node_id = PM_NODE_IDENTIFY(parser),
6545 .location = {
6546 .start = (left == NULL ? operator->start : left->location.start),
6547 .end = (right == NULL ? operator->end : right->location.end)
6548 }
6549 },
6550 .left = left,
6551 .right = right,
6552 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6553 };
6554
6555 return node;
6556}
6557
6561static pm_redo_node_t *
6562pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6563 assert(token->type == PM_TOKEN_KEYWORD_REDO);
6564 pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
6565
6566 *node = (pm_redo_node_t) {{
6567 .type = PM_REDO_NODE,
6568 .node_id = PM_NODE_IDENTIFY(parser),
6569 .location = PM_LOCATION_TOKEN_VALUE(token)
6570 }};
6571
6572 return node;
6573}
6574
6579static pm_regular_expression_node_t *
6580pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6581 pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
6582
6583 *node = (pm_regular_expression_node_t) {
6584 {
6585 .type = PM_REGULAR_EXPRESSION_NODE,
6586 .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6587 .node_id = PM_NODE_IDENTIFY(parser),
6588 .location = {
6589 .start = MIN(opening->start, closing->start),
6590 .end = MAX(opening->end, closing->end)
6591 }
6592 },
6593 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6594 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6595 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
6596 .unescaped = *unescaped
6597 };
6598
6599 return node;
6600}
6601
6605static inline pm_regular_expression_node_t *
6606pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6607 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6608}
6609
6613static pm_required_parameter_node_t *
6614pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6615 pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
6616
6617 *node = (pm_required_parameter_node_t) {
6618 {
6619 .type = PM_REQUIRED_PARAMETER_NODE,
6620 .node_id = PM_NODE_IDENTIFY(parser),
6621 .location = PM_LOCATION_TOKEN_VALUE(token)
6622 },
6623 .name = pm_parser_constant_id_token(parser, token)
6624 };
6625
6626 return node;
6627}
6628
6632static pm_rescue_modifier_node_t *
6633pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6634 pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
6635
6636 *node = (pm_rescue_modifier_node_t) {
6637 {
6638 .type = PM_RESCUE_MODIFIER_NODE,
6639 .node_id = PM_NODE_IDENTIFY(parser),
6640 .location = {
6641 .start = expression->location.start,
6642 .end = rescue_expression->location.end
6643 }
6644 },
6645 .expression = expression,
6646 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6647 .rescue_expression = rescue_expression
6648 };
6649
6650 return node;
6651}
6652
6656static pm_rescue_node_t *
6657pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6658 pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
6659
6660 *node = (pm_rescue_node_t) {
6661 {
6662 .type = PM_RESCUE_NODE,
6663 .node_id = PM_NODE_IDENTIFY(parser),
6664 .location = PM_LOCATION_TOKEN_VALUE(keyword)
6665 },
6666 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6667 .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6668 .reference = NULL,
6669 .statements = NULL,
6670 .subsequent = NULL,
6671 .exceptions = { 0 }
6672 };
6673
6674 return node;
6675}
6676
6677static inline void
6678pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
6679 node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
6680}
6681
6685static void
6686pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6687 node->reference = reference;
6688 node->base.location.end = reference->location.end;
6689}
6690
6694static void
6695pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6696 node->statements = statements;
6697 if (pm_statements_node_body_length(statements) > 0) {
6698 node->base.location.end = statements->base.location.end;
6699 }
6700}
6701
6705static void
6706pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6707 node->subsequent = subsequent;
6708 node->base.location.end = subsequent->base.location.end;
6709}
6710
6714static void
6715pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6716 pm_node_list_append(&node->exceptions, exception);
6717 node->base.location.end = exception->location.end;
6718}
6719
6723static pm_rest_parameter_node_t *
6724pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6725 pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6726
6727 *node = (pm_rest_parameter_node_t) {
6728 {
6729 .type = PM_REST_PARAMETER_NODE,
6730 .node_id = PM_NODE_IDENTIFY(parser),
6731 .location = {
6732 .start = operator->start,
6733 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
6734 }
6735 },
6736 .name = pm_parser_optional_constant_id_token(parser, name),
6737 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6738 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6739 };
6740
6741 return node;
6742}
6743
6747static pm_retry_node_t *
6748pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6749 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6750 pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6751
6752 *node = (pm_retry_node_t) {{
6753 .type = PM_RETRY_NODE,
6754 .node_id = PM_NODE_IDENTIFY(parser),
6755 .location = PM_LOCATION_TOKEN_VALUE(token)
6756 }};
6757
6758 return node;
6759}
6760
6764static pm_return_node_t *
6765pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6766 pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6767
6768 *node = (pm_return_node_t) {
6769 {
6770 .type = PM_RETURN_NODE,
6771 .node_id = PM_NODE_IDENTIFY(parser),
6772 .location = {
6773 .start = keyword->start,
6774 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6775 }
6776 },
6777 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6778 .arguments = arguments
6779 };
6780
6781 return node;
6782}
6783
6787static pm_self_node_t *
6788pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6789 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6790 pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6791
6792 *node = (pm_self_node_t) {{
6793 .type = PM_SELF_NODE,
6794 .node_id = PM_NODE_IDENTIFY(parser),
6795 .location = PM_LOCATION_TOKEN_VALUE(token)
6796 }};
6797
6798 return node;
6799}
6800
6804static pm_shareable_constant_node_t *
6805pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6806 pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6807
6808 *node = (pm_shareable_constant_node_t) {
6809 {
6810 .type = PM_SHAREABLE_CONSTANT_NODE,
6811 .flags = (pm_node_flags_t) value,
6812 .node_id = PM_NODE_IDENTIFY(parser),
6813 .location = PM_LOCATION_NODE_VALUE(write)
6814 },
6815 .write = write
6816 };
6817
6818 return node;
6819}
6820
6824static pm_singleton_class_node_t *
6825pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6826 pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6827
6828 *node = (pm_singleton_class_node_t) {
6829 {
6830 .type = PM_SINGLETON_CLASS_NODE,
6831 .node_id = PM_NODE_IDENTIFY(parser),
6832 .location = {
6833 .start = class_keyword->start,
6834 .end = end_keyword->end
6835 }
6836 },
6837 .locals = *locals,
6838 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6839 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6840 .expression = expression,
6841 .body = body,
6842 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6843 };
6844
6845 return node;
6846}
6847
6851static pm_source_encoding_node_t *
6852pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6853 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6854 pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6855
6856 *node = (pm_source_encoding_node_t) {{
6857 .type = PM_SOURCE_ENCODING_NODE,
6858 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6859 .node_id = PM_NODE_IDENTIFY(parser),
6860 .location = PM_LOCATION_TOKEN_VALUE(token)
6861 }};
6862
6863 return node;
6864}
6865
6869static pm_source_file_node_t*
6870pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6871 pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6872 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6873
6874 pm_node_flags_t flags = 0;
6875
6876 switch (parser->frozen_string_literal) {
6877 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6878 flags |= PM_STRING_FLAGS_MUTABLE;
6879 break;
6880 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6881 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6882 break;
6883 }
6884
6885 *node = (pm_source_file_node_t) {
6886 {
6887 .type = PM_SOURCE_FILE_NODE,
6888 .flags = flags,
6889 .node_id = PM_NODE_IDENTIFY(parser),
6890 .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
6891 },
6892 .filepath = parser->filepath
6893 };
6894
6895 return node;
6896}
6897
6901static pm_source_line_node_t *
6902pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6903 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6904 pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6905
6906 *node = (pm_source_line_node_t) {{
6907 .type = PM_SOURCE_LINE_NODE,
6908 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6909 .node_id = PM_NODE_IDENTIFY(parser),
6910 .location = PM_LOCATION_TOKEN_VALUE(token)
6911 }};
6912
6913 return node;
6914}
6915
6919static pm_splat_node_t *
6920pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6921 pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6922
6923 *node = (pm_splat_node_t) {
6924 {
6925 .type = PM_SPLAT_NODE,
6926 .node_id = PM_NODE_IDENTIFY(parser),
6927 .location = {
6928 .start = operator->start,
6929 .end = (expression == NULL ? operator->end : expression->location.end)
6930 }
6931 },
6932 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6933 .expression = expression
6934 };
6935
6936 return node;
6937}
6938
6942static pm_statements_node_t *
6943pm_statements_node_create(pm_parser_t *parser) {
6944 pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6945
6946 *node = (pm_statements_node_t) {
6947 {
6948 .type = PM_STATEMENTS_NODE,
6949 .node_id = PM_NODE_IDENTIFY(parser),
6950 .location = PM_LOCATION_NULL_VALUE(parser)
6951 },
6952 .body = { 0 }
6953 };
6954
6955 return node;
6956}
6957
6961static size_t
6962pm_statements_node_body_length(pm_statements_node_t *node) {
6963 return node && node->body.size;
6964}
6965
6969static void
6970pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6971 node->base.location = (pm_location_t) { .start = start, .end = end };
6972}
6973
6978static inline void
6979pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6980 if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
6981 node->base.location.start = statement->location.start;
6982 }
6983
6984 if (statement->location.end > node->base.location.end) {
6985 node->base.location.end = statement->location.end;
6986 }
6987}
6988
6992static void
6993pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6994 pm_statements_node_body_update(node, statement);
6995
6996 if (node->body.size > 0) {
6997 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6998
6999 switch (PM_NODE_TYPE(previous)) {
7000 case PM_BREAK_NODE:
7001 case PM_NEXT_NODE:
7002 case PM_REDO_NODE:
7003 case PM_RETRY_NODE:
7004 case PM_RETURN_NODE:
7005 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
7006 break;
7007 default:
7008 break;
7009 }
7010 }
7011
7012 pm_node_list_append(&node->body, statement);
7013 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7014}
7015
7019static void
7020pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
7021 pm_statements_node_body_update(node, statement);
7022 pm_node_list_prepend(&node->body, statement);
7023 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7024}
7025
7029static inline pm_string_node_t *
7030pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
7031 pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
7032 pm_node_flags_t flags = 0;
7033
7034 switch (parser->frozen_string_literal) {
7035 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7036 flags = PM_STRING_FLAGS_MUTABLE;
7037 break;
7038 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7039 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7040 break;
7041 }
7042
7043 *node = (pm_string_node_t) {
7044 {
7045 .type = PM_STRING_NODE,
7046 .flags = flags,
7047 .node_id = PM_NODE_IDENTIFY(parser),
7048 .location = {
7049 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
7050 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
7051 }
7052 },
7053 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7054 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7055 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7056 .unescaped = *string
7057 };
7058
7059 return node;
7060}
7061
7065static pm_string_node_t *
7066pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7067 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7068}
7069
7074static pm_string_node_t *
7075pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7076 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
7077 parser->current_string = PM_STRING_EMPTY;
7078 return node;
7079}
7080
7084static pm_super_node_t *
7085pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
7086 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
7087 pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
7088
7089 const uint8_t *end = pm_arguments_end(arguments);
7090 if (end == NULL) {
7091 assert(false && "unreachable");
7092 }
7093
7094 *node = (pm_super_node_t) {
7095 {
7096 .type = PM_SUPER_NODE,
7097 .node_id = PM_NODE_IDENTIFY(parser),
7098 .location = {
7099 .start = keyword->start,
7100 .end = end,
7101 }
7102 },
7103 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7104 .lparen_loc = arguments->opening_loc,
7105 .arguments = arguments->arguments,
7106 .rparen_loc = arguments->closing_loc,
7107 .block = arguments->block
7108 };
7109
7110 return node;
7111}
7112
7117static bool
7118pm_ascii_only_p(const pm_string_t *contents) {
7119 const size_t length = pm_string_length(contents);
7120 const uint8_t *source = pm_string_source(contents);
7121
7122 for (size_t index = 0; index < length; index++) {
7123 if (source[index] & 0x80) return false;
7124 }
7125
7126 return true;
7127}
7128
7132static void
7133parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7134 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7135 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7136
7137 if (width == 0) {
7138 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7139 break;
7140 }
7141
7142 cursor += width;
7143 }
7144}
7145
7150static void
7151parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7152 const pm_encoding_t *encoding = parser->encoding;
7153
7154 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7155 size_t width = encoding->char_width(cursor, end - cursor);
7156
7157 if (width == 0) {
7158 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7159 break;
7160 }
7161
7162 cursor += width;
7163 }
7164}
7165
7175static inline pm_node_flags_t
7176parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
7177 if (parser->explicit_encoding != NULL) {
7178 // A Symbol may optionally have its encoding explicitly set. This will
7179 // happen if an escape sequence results in a non-ASCII code point.
7180 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7181 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
7182 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
7183 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7184 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7185 } else if (validate) {
7186 parse_symbol_encoding_validate_other(parser, location, contents);
7187 }
7188 } else if (pm_ascii_only_p(contents)) {
7189 // Ruby stipulates that all source files must use an ASCII-compatible
7190 // encoding. Thus, all symbols appearing in source are eligible for
7191 // "downgrading" to US-ASCII.
7192 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7193 } else if (validate) {
7194 parse_symbol_encoding_validate_other(parser, location, contents);
7195 }
7196
7197 return 0;
7198}
7199
7200static pm_node_flags_t
7201parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
7202 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
7203 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
7204 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
7205 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
7206
7207 // There's special validation logic used if a string does not contain any character escape sequences.
7208 if (parser->explicit_encoding == NULL) {
7209 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
7210 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
7211 // the US-ASCII encoding.
7212 if (ascii_only) {
7213 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
7214 }
7215
7216 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7217 if (!ascii_only) {
7218 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7219 }
7220 } else if (parser->encoding != modifier_encoding) {
7221 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
7222
7223 if (modifier == 'n' && !ascii_only) {
7224 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
7225 }
7226 }
7227
7228 return flags;
7229 }
7230
7231 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
7232 bool mixed_encoding = false;
7233
7234 if (mixed_encoding) {
7235 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7236 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
7237 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
7238 bool valid_string_in_modifier_encoding = true;
7239
7240 if (!valid_string_in_modifier_encoding) {
7241 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7242 }
7243 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7244 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
7245 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7246 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
7247 }
7248 }
7249
7250 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
7251 return flags;
7252}
7253
7260static pm_node_flags_t
7261parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
7262 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
7263 bool valid_unicode_range = true;
7264 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
7265 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7266 return flags;
7267 }
7268
7269 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
7270 // to multi-byte characters are allowed.
7271 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
7272 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
7273 // following error message appearing twice. We do the same for compatibility.
7274 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7275 }
7276
7284
7285 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
7286 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
7287 }
7288
7289 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
7290 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
7291 }
7292
7293 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
7294 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
7295 }
7296
7297 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
7298 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
7299 }
7300
7301 // At this point no encoding modifiers will be present on the regular expression as they would have already
7302 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
7303 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
7304 if (ascii_only) {
7305 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
7306 }
7307
7308 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
7309 // or by specifying a modifier.
7310 //
7311 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
7312 if (parser->explicit_encoding != NULL) {
7313 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7314 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
7315 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7316 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
7317 }
7318 }
7319
7320 return 0;
7321}
7322
7327static pm_symbol_node_t *
7328pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
7329 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7330
7331 *node = (pm_symbol_node_t) {
7332 {
7333 .type = PM_SYMBOL_NODE,
7334 .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
7335 .node_id = PM_NODE_IDENTIFY(parser),
7336 .location = {
7337 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
7338 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
7339 }
7340 },
7341 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7342 .value_loc = PM_LOCATION_TOKEN_VALUE(value),
7343 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7344 .unescaped = *unescaped
7345 };
7346
7347 return node;
7348}
7349
7353static inline pm_symbol_node_t *
7354pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7355 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
7356}
7357
7361static pm_symbol_node_t *
7362pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7363 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
7364 parser->current_string = PM_STRING_EMPTY;
7365 return node;
7366}
7367
7371static pm_symbol_node_t *
7372pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
7373 pm_symbol_node_t *node;
7374
7375 switch (token->type) {
7376 case PM_TOKEN_LABEL: {
7377 pm_token_t opening = not_provided(parser);
7378 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
7379
7380 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
7381 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7382
7383 assert((label.end - label.start) >= 0);
7384 pm_string_shared_init(&node->unescaped, label.start, label.end);
7385 pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
7386
7387 break;
7388 }
7389 case PM_TOKEN_MISSING: {
7390 pm_token_t opening = not_provided(parser);
7391 pm_token_t closing = not_provided(parser);
7392
7393 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
7394 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7395 break;
7396 }
7397 default:
7398 assert(false && "unreachable");
7399 node = NULL;
7400 break;
7401 }
7402
7403 return node;
7404}
7405
7409static pm_symbol_node_t *
7410pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7411 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7412
7413 *node = (pm_symbol_node_t) {
7414 {
7415 .type = PM_SYMBOL_NODE,
7416 .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7417 .node_id = PM_NODE_IDENTIFY(parser),
7418 .location = PM_LOCATION_NULL_VALUE(parser)
7419 },
7420 .value_loc = PM_LOCATION_NULL_VALUE(parser),
7421 .unescaped = { 0 }
7422 };
7423
7424 pm_string_constant_init(&node->unescaped, content, strlen(content));
7425 return node;
7426}
7427
7431static bool
7432pm_symbol_node_label_p(pm_node_t *node) {
7433 const uint8_t *end = NULL;
7434
7435 switch (PM_NODE_TYPE(node)) {
7436 case PM_SYMBOL_NODE:
7437 end = ((pm_symbol_node_t *) node)->closing_loc.end;
7438 break;
7439 case PM_INTERPOLATED_SYMBOL_NODE:
7440 end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
7441 break;
7442 default:
7443 return false;
7444 }
7445
7446 return (end != NULL) && (end[-1] == ':');
7447}
7448
7452static pm_symbol_node_t *
7453pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
7454 pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7455
7456 *new_node = (pm_symbol_node_t) {
7457 {
7458 .type = PM_SYMBOL_NODE,
7459 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7460 .node_id = PM_NODE_IDENTIFY(parser),
7461 .location = {
7462 .start = opening->start,
7463 .end = closing->end
7464 }
7465 },
7466 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7467 .value_loc = node->content_loc,
7468 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7469 .unescaped = node->unescaped
7470 };
7471
7472 pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7473 pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7474
7475 // We are explicitly _not_ using pm_node_destroy here because we don't want
7476 // to trash the unescaped string. We could instead copy the string if we
7477 // know that it is owned, but we're taking the fast path for now.
7478 xfree(node);
7479
7480 return new_node;
7481}
7482
7486static pm_string_node_t *
7487pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
7488 pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
7489 pm_node_flags_t flags = 0;
7490
7491 switch (parser->frozen_string_literal) {
7492 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7493 flags = PM_STRING_FLAGS_MUTABLE;
7494 break;
7495 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7496 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7497 break;
7498 }
7499
7500 *new_node = (pm_string_node_t) {
7501 {
7502 .type = PM_STRING_NODE,
7503 .flags = flags,
7504 .node_id = PM_NODE_IDENTIFY(parser),
7505 .location = node->base.location
7506 },
7507 .opening_loc = node->opening_loc,
7508 .content_loc = node->value_loc,
7509 .closing_loc = node->closing_loc,
7510 .unescaped = node->unescaped
7511 };
7512
7513 // We are explicitly _not_ using pm_node_destroy here because we don't want
7514 // to trash the unescaped string. We could instead copy the string if we
7515 // know that it is owned, but we're taking the fast path for now.
7516 xfree(node);
7517
7518 return new_node;
7519}
7520
7524static pm_true_node_t *
7525pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
7526 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
7527 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7528
7529 *node = (pm_true_node_t) {{
7530 .type = PM_TRUE_NODE,
7531 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7532 .node_id = PM_NODE_IDENTIFY(parser),
7533 .location = PM_LOCATION_TOKEN_VALUE(token)
7534 }};
7535
7536 return node;
7537}
7538
7542static pm_true_node_t *
7543pm_true_node_synthesized_create(pm_parser_t *parser) {
7544 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7545
7546 *node = (pm_true_node_t) {{
7547 .type = PM_TRUE_NODE,
7548 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7549 .node_id = PM_NODE_IDENTIFY(parser),
7550 .location = { .start = parser->start, .end = parser->end }
7551 }};
7552
7553 return node;
7554}
7555
7559static pm_undef_node_t *
7560pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
7561 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
7562 pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
7563
7564 *node = (pm_undef_node_t) {
7565 {
7566 .type = PM_UNDEF_NODE,
7567 .node_id = PM_NODE_IDENTIFY(parser),
7568 .location = PM_LOCATION_TOKEN_VALUE(token),
7569 },
7570 .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
7571 .names = { 0 }
7572 };
7573
7574 return node;
7575}
7576
7580static void
7581pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
7582 node->base.location.end = name->location.end;
7583 pm_node_list_append(&node->names, name);
7584}
7585
7589static pm_unless_node_t *
7590pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
7591 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7592 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7593
7594 const uint8_t *end;
7595 if (statements != NULL) {
7596 end = statements->base.location.end;
7597 } else {
7598 end = predicate->location.end;
7599 }
7600
7601 *node = (pm_unless_node_t) {
7602 {
7603 .type = PM_UNLESS_NODE,
7604 .flags = PM_NODE_FLAG_NEWLINE,
7605 .node_id = PM_NODE_IDENTIFY(parser),
7606 .location = {
7607 .start = keyword->start,
7608 .end = end
7609 },
7610 },
7611 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7612 .predicate = predicate,
7613 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
7614 .statements = statements,
7615 .else_clause = NULL,
7616 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7617 };
7618
7619 return node;
7620}
7621
7625static pm_unless_node_t *
7626pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
7627 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7628 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7629
7630 pm_statements_node_t *statements = pm_statements_node_create(parser);
7631 pm_statements_node_body_append(parser, statements, statement, true);
7632
7633 *node = (pm_unless_node_t) {
7634 {
7635 .type = PM_UNLESS_NODE,
7636 .flags = PM_NODE_FLAG_NEWLINE,
7637 .node_id = PM_NODE_IDENTIFY(parser),
7638 .location = {
7639 .start = statement->location.start,
7640 .end = predicate->location.end
7641 },
7642 },
7643 .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
7644 .predicate = predicate,
7645 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7646 .statements = statements,
7647 .else_clause = NULL,
7648 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7649 };
7650
7651 return node;
7652}
7653
7654static inline void
7655pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
7656 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
7657 node->base.location.end = end_keyword->end;
7658}
7659
7665static void
7666pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7667 assert(parser->current_block_exits != NULL);
7668
7669 // All of the block exits that we want to remove should be within the
7670 // statements, and since we are modifying the statements, we shouldn't have
7671 // to check the end location.
7672 const uint8_t *start = statements->base.location.start;
7673
7674 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7675 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7676 if (block_exit->location.start < start) break;
7677
7678 // Implicitly remove from the list by lowering the size.
7679 parser->current_block_exits->size--;
7680 }
7681}
7682
7686static pm_until_node_t *
7687pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7688 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7689 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7690
7691 *node = (pm_until_node_t) {
7692 {
7693 .type = PM_UNTIL_NODE,
7694 .flags = flags,
7695 .node_id = PM_NODE_IDENTIFY(parser),
7696 .location = {
7697 .start = keyword->start,
7698 .end = closing->end,
7699 },
7700 },
7701 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7702 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7703 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7704 .predicate = predicate,
7705 .statements = statements
7706 };
7707
7708 return node;
7709}
7710
7714static pm_until_node_t *
7715pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7716 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7717 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7718 pm_loop_modifier_block_exits(parser, statements);
7719
7720 *node = (pm_until_node_t) {
7721 {
7722 .type = PM_UNTIL_NODE,
7723 .flags = flags,
7724 .node_id = PM_NODE_IDENTIFY(parser),
7725 .location = {
7726 .start = statements->base.location.start,
7727 .end = predicate->location.end,
7728 },
7729 },
7730 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7731 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7732 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7733 .predicate = predicate,
7734 .statements = statements
7735 };
7736
7737 return node;
7738}
7739
7743static pm_when_node_t *
7744pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7745 pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
7746
7747 *node = (pm_when_node_t) {
7748 {
7749 .type = PM_WHEN_NODE,
7750 .node_id = PM_NODE_IDENTIFY(parser),
7751 .location = {
7752 .start = keyword->start,
7753 .end = NULL
7754 }
7755 },
7756 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7757 .statements = NULL,
7758 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7759 .conditions = { 0 }
7760 };
7761
7762 return node;
7763}
7764
7768static void
7769pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
7770 node->base.location.end = condition->location.end;
7771 pm_node_list_append(&node->conditions, condition);
7772}
7773
7777static inline void
7778pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
7779 node->base.location.end = then_keyword->end;
7780 node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
7781}
7782
7786static void
7787pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7788 if (statements->base.location.end > node->base.location.end) {
7789 node->base.location.end = statements->base.location.end;
7790 }
7791
7792 node->statements = statements;
7793}
7794
7798static pm_while_node_t *
7799pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7800 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7801 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7802
7803 *node = (pm_while_node_t) {
7804 {
7805 .type = PM_WHILE_NODE,
7806 .flags = flags,
7807 .node_id = PM_NODE_IDENTIFY(parser),
7808 .location = {
7809 .start = keyword->start,
7810 .end = closing->end
7811 },
7812 },
7813 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7814 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7815 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7816 .predicate = predicate,
7817 .statements = statements
7818 };
7819
7820 return node;
7821}
7822
7826static pm_while_node_t *
7827pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7828 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7829 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7830 pm_loop_modifier_block_exits(parser, statements);
7831
7832 *node = (pm_while_node_t) {
7833 {
7834 .type = PM_WHILE_NODE,
7835 .flags = flags,
7836 .node_id = PM_NODE_IDENTIFY(parser),
7837 .location = {
7838 .start = statements->base.location.start,
7839 .end = predicate->location.end
7840 },
7841 },
7842 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7843 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7844 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7845 .predicate = predicate,
7846 .statements = statements
7847 };
7848
7849 return node;
7850}
7851
7855static pm_while_node_t *
7856pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7857 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7858
7859 *node = (pm_while_node_t) {
7860 {
7861 .type = PM_WHILE_NODE,
7862 .node_id = PM_NODE_IDENTIFY(parser),
7863 .location = PM_LOCATION_NULL_VALUE(parser)
7864 },
7865 .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7866 .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7867 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7868 .predicate = predicate,
7869 .statements = statements
7870 };
7871
7872 return node;
7873}
7874
7879static pm_x_string_node_t *
7880pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7881 pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7882
7883 *node = (pm_x_string_node_t) {
7884 {
7885 .type = PM_X_STRING_NODE,
7886 .flags = PM_STRING_FLAGS_FROZEN,
7887 .node_id = PM_NODE_IDENTIFY(parser),
7888 .location = {
7889 .start = opening->start,
7890 .end = closing->end
7891 },
7892 },
7893 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7894 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7895 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7896 .unescaped = *unescaped
7897 };
7898
7899 return node;
7900}
7901
7905static inline pm_x_string_node_t *
7906pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7907 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7908}
7909
7913static pm_yield_node_t *
7914pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7915 pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7916
7917 const uint8_t *end;
7918 if (rparen_loc->start != NULL) {
7919 end = rparen_loc->end;
7920 } else if (arguments != NULL) {
7921 end = arguments->base.location.end;
7922 } else if (lparen_loc->start != NULL) {
7923 end = lparen_loc->end;
7924 } else {
7925 end = keyword->end;
7926 }
7927
7928 *node = (pm_yield_node_t) {
7929 {
7930 .type = PM_YIELD_NODE,
7931 .node_id = PM_NODE_IDENTIFY(parser),
7932 .location = {
7933 .start = keyword->start,
7934 .end = end
7935 },
7936 },
7937 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7938 .lparen_loc = *lparen_loc,
7939 .arguments = arguments,
7940 .rparen_loc = *rparen_loc
7941 };
7942
7943 return node;
7944}
7945
7946#undef PM_NODE_ALLOC
7947#undef PM_NODE_IDENTIFY
7948
7953static int
7954pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7955 pm_scope_t *scope = parser->current_scope;
7956 int depth = 0;
7957
7958 while (scope != NULL) {
7959 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7960 if (scope->closed) break;
7961
7962 scope = scope->previous;
7963 depth++;
7964 }
7965
7966 return -1;
7967}
7968
7974static inline int
7975pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7976 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7977}
7978
7982static inline void
7983pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7984 pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
7985}
7986
7990static pm_constant_id_t
7991pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7992 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
7993 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7994 return constant_id;
7995}
7996
8000static inline pm_constant_id_t
8001pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
8002 return pm_parser_local_add_location(parser, token->start, token->end, reads);
8003}
8004
8008static pm_constant_id_t
8009pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
8010 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
8011 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8012 return constant_id;
8013}
8014
8018static pm_constant_id_t
8019pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
8020 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
8021 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8022 return constant_id;
8023}
8024
8032static bool
8033pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
8034 // We want to check whether the parameter name is a numbered parameter or
8035 // not.
8036 pm_refute_numbered_parameter(parser, name->start, name->end);
8037
8038 // Otherwise we'll fetch the constant id for the parameter name and check
8039 // whether it's already in the current scope.
8040 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
8041
8042 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
8043 // Add an error if the parameter doesn't start with _ and has been seen before
8044 if ((name->start < name->end) && (*name->start != '_')) {
8045 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
8046 }
8047 return true;
8048 }
8049 return false;
8050}
8051
8055static void
8056pm_parser_scope_pop(pm_parser_t *parser) {
8057 pm_scope_t *scope = parser->current_scope;
8058 parser->current_scope = scope->previous;
8059 pm_locals_free(&scope->locals);
8060 pm_node_list_free(&scope->implicit_parameters);
8061 xfree(scope);
8062}
8063
8064/******************************************************************************/
8065/* Stack helpers */
8066/******************************************************************************/
8067
8071static inline void
8072pm_state_stack_push(pm_state_stack_t *stack, bool value) {
8073 *stack = (*stack << 1) | (value & 1);
8074}
8075
8079static inline void
8080pm_state_stack_pop(pm_state_stack_t *stack) {
8081 *stack >>= 1;
8082}
8083
8087static inline bool
8088pm_state_stack_p(const pm_state_stack_t *stack) {
8089 return *stack & 1;
8090}
8091
8092static inline void
8093pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
8094 // Use the negation of the value to prevent stack overflow.
8095 pm_state_stack_push(&parser->accepts_block_stack, !value);
8096}
8097
8098static inline void
8099pm_accepts_block_stack_pop(pm_parser_t *parser) {
8100 pm_state_stack_pop(&parser->accepts_block_stack);
8101}
8102
8103static inline bool
8104pm_accepts_block_stack_p(pm_parser_t *parser) {
8105 return !pm_state_stack_p(&parser->accepts_block_stack);
8106}
8107
8108static inline void
8109pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
8110 pm_state_stack_push(&parser->do_loop_stack, value);
8111}
8112
8113static inline void
8114pm_do_loop_stack_pop(pm_parser_t *parser) {
8115 pm_state_stack_pop(&parser->do_loop_stack);
8116}
8117
8118static inline bool
8119pm_do_loop_stack_p(pm_parser_t *parser) {
8120 return pm_state_stack_p(&parser->do_loop_stack);
8121}
8122
8123/******************************************************************************/
8124/* Lexer check helpers */
8125/******************************************************************************/
8126
8131static inline uint8_t
8132peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8133 if (cursor < parser->end) {
8134 return *cursor;
8135 } else {
8136 return '\0';
8137 }
8138}
8139
8145static inline uint8_t
8146peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8147 return peek_at(parser, parser->current.end + offset);
8148}
8149
8154static inline uint8_t
8155peek(const pm_parser_t *parser) {
8156 return peek_at(parser, parser->current.end);
8157}
8158
8163static inline bool
8164match(pm_parser_t *parser, uint8_t value) {
8165 if (peek(parser) == value) {
8166 parser->current.end++;
8167 return true;
8168 }
8169 return false;
8170}
8171
8176static inline size_t
8177match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
8178 if (peek_at(parser, cursor) == '\n') {
8179 return 1;
8180 }
8181 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
8182 return 2;
8183 }
8184 return 0;
8185}
8186
8192static inline size_t
8193match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
8194 return match_eol_at(parser, parser->current.end + offset);
8195}
8196
8202static inline size_t
8203match_eol(pm_parser_t *parser) {
8204 return match_eol_at(parser, parser->current.end);
8205}
8206
8210static inline const uint8_t *
8211next_newline(const uint8_t *cursor, ptrdiff_t length) {
8212 assert(length >= 0);
8213
8214 // Note that it's okay for us to use memchr here to look for \n because none
8215 // of the encodings that we support have \n as a component of a multi-byte
8216 // character.
8217 return memchr(cursor, '\n', (size_t) length);
8218}
8219
8223static inline bool
8224ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8225 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8226}
8227
8232static bool
8233parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
8234 const pm_encoding_t *encoding = pm_encoding_find(start, end);
8235
8236 if (encoding != NULL) {
8237 if (parser->encoding != encoding) {
8238 parser->encoding = encoding;
8239 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
8240 }
8241
8242 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
8243 return true;
8244 }
8245
8246 return false;
8247}
8248
8253static void
8254parser_lex_magic_comment_encoding(pm_parser_t *parser) {
8255 const uint8_t *cursor = parser->current.start + 1;
8256 const uint8_t *end = parser->current.end;
8257
8258 bool separator = false;
8259 while (true) {
8260 if (end - cursor <= 6) return;
8261 switch (cursor[6]) {
8262 case 'C': case 'c': cursor += 6; continue;
8263 case 'O': case 'o': cursor += 5; continue;
8264 case 'D': case 'd': cursor += 4; continue;
8265 case 'I': case 'i': cursor += 3; continue;
8266 case 'N': case 'n': cursor += 2; continue;
8267 case 'G': case 'g': cursor += 1; continue;
8268 case '=': case ':':
8269 separator = true;
8270 cursor += 6;
8271 break;
8272 default:
8273 cursor += 6;
8274 if (pm_char_is_whitespace(*cursor)) break;
8275 continue;
8276 }
8277 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
8278 separator = false;
8279 }
8280
8281 while (true) {
8282 do {
8283 if (++cursor >= end) return;
8284 } while (pm_char_is_whitespace(*cursor));
8285
8286 if (separator) break;
8287 if (*cursor != '=' && *cursor != ':') return;
8288
8289 separator = true;
8290 cursor++;
8291 }
8292
8293 const uint8_t *value_start = cursor;
8294 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
8295
8296 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
8297 // If we were unable to parse the encoding value, then we've got an
8298 // issue because we didn't understand the encoding that the user was
8299 // trying to use. In this case we'll keep using the default encoding but
8300 // add an error to the parser to indicate an unsuccessful parse.
8301 pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
8302 }
8303}
8304
8305typedef enum {
8306 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
8307 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
8308 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
8309} pm_magic_comment_boolean_value_t;
8310
8315static pm_magic_comment_boolean_value_t
8316parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
8317 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
8318 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
8319 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
8320 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
8321 } else {
8322 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
8323 }
8324}
8325
8326static inline bool
8327pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
8328 return b == '\'' || b == '"' || b == ':' || b == ';';
8329}
8330
8336static inline const uint8_t *
8337parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
8338 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
8339 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
8340 return cursor;
8341 }
8342 cursor++;
8343 }
8344 return NULL;
8345}
8346
8357static inline bool
8358parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8359 bool result = true;
8360
8361 const uint8_t *start = parser->current.start + 1;
8362 const uint8_t *end = parser->current.end;
8363 if (end - start <= 7) return false;
8364
8365 const uint8_t *cursor;
8366 bool indicator = false;
8367
8368 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8369 start = cursor + 3;
8370
8371 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8372 end = cursor;
8373 indicator = true;
8374 } else {
8375 // If we have a start marker but not an end marker, then we cannot
8376 // have a magic comment.
8377 return false;
8378 }
8379 }
8380
8381 cursor = start;
8382 while (cursor < end) {
8383 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
8384
8385 const uint8_t *key_start = cursor;
8386 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
8387
8388 const uint8_t *key_end = cursor;
8389 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8390 if (cursor == end) break;
8391
8392 if (*cursor == ':') {
8393 cursor++;
8394 } else {
8395 if (!indicator) return false;
8396 continue;
8397 }
8398
8399 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8400 if (cursor == end) break;
8401
8402 const uint8_t *value_start;
8403 const uint8_t *value_end;
8404
8405 if (*cursor == '"') {
8406 value_start = ++cursor;
8407 for (; cursor < end && *cursor != '"'; cursor++) {
8408 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
8409 }
8410 value_end = cursor;
8411 if (*cursor == '"') cursor++;
8412 } else {
8413 value_start = cursor;
8414 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
8415 value_end = cursor;
8416 }
8417
8418 if (indicator) {
8419 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
8420 } else {
8421 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8422 if (cursor != end) return false;
8423 }
8424
8425 // Here, we need to do some processing on the key to swap out dashes for
8426 // underscores. We only need to do this if there _is_ a dash in the key.
8427 pm_string_t key;
8428 const size_t key_length = (size_t) (key_end - key_start);
8429 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
8430
8431 if (dash == NULL) {
8432 pm_string_shared_init(&key, key_start, key_end);
8433 } else {
8434 uint8_t *buffer = xmalloc(key_length);
8435 if (buffer == NULL) break;
8436
8437 memcpy(buffer, key_start, key_length);
8438 buffer[dash - key_start] = '_';
8439
8440 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
8441 buffer[dash - key_start] = '_';
8442 }
8443
8444 pm_string_owned_init(&key, buffer, key_length);
8445 }
8446
8447 // Finally, we can start checking the key against the list of known
8448 // magic comment keys, and potentially change state based on that.
8449 const uint8_t *key_source = pm_string_source(&key);
8450 uint32_t value_length = (uint32_t) (value_end - value_start);
8451
8452 // We only want to attempt to compare against encoding comments if it's
8453 // the first line in the file (or the second in the case of a shebang).
8454 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
8455 if (
8456 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
8457 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
8458 ) {
8459 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
8460 }
8461 }
8462
8463 if (key_length == 11) {
8464 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
8465 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8466 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8467 PM_PARSER_WARN_TOKEN_FORMAT(
8468 parser,
8469 parser->current,
8470 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8471 (int) key_length,
8472 (const char *) key_source,
8473 (int) value_length,
8474 (const char *) value_start
8475 );
8476 break;
8477 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8478 parser->warn_mismatched_indentation = false;
8479 break;
8480 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8481 parser->warn_mismatched_indentation = true;
8482 break;
8483 }
8484 }
8485 } else if (key_length == 21) {
8486 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
8487 // We only want to handle frozen string literal comments if it's
8488 // before any semantic tokens have been seen.
8489 if (semantic_token_seen) {
8490 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
8491 } else {
8492 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8493 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8494 PM_PARSER_WARN_TOKEN_FORMAT(
8495 parser,
8496 parser->current,
8497 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8498 (int) key_length,
8499 (const char *) key_source,
8500 (int) value_length,
8501 (const char *) value_start
8502 );
8503 break;
8504 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8506 break;
8507 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8509 break;
8510 }
8511 }
8512 }
8513 } else if (key_length == 24) {
8514 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8515 const uint8_t *cursor = parser->current.start;
8516 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8517
8518 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8519 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8520 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8521 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8522 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8523 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
8524 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
8525 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
8526 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
8527 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
8528 } else {
8529 PM_PARSER_WARN_TOKEN_FORMAT(
8530 parser,
8531 parser->current,
8532 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8533 (int) key_length,
8534 (const char *) key_source,
8535 (int) value_length,
8536 (const char *) value_start
8537 );
8538 }
8539 }
8540 }
8541
8542 // When we're done, we want to free the string in case we had to
8543 // allocate memory for it.
8544 pm_string_free(&key);
8545
8546 // Allocate a new magic comment node to append to the parser's list.
8548 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
8549 magic_comment->key_start = key_start;
8550 magic_comment->value_start = value_start;
8551 magic_comment->key_length = (uint32_t) key_length;
8552 magic_comment->value_length = value_length;
8553 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
8554 }
8555 }
8556
8557 return result;
8558}
8559
8560/******************************************************************************/
8561/* Context manipulations */
8562/******************************************************************************/
8563
8564static bool
8565context_terminator(pm_context_t context, pm_token_t *token) {
8566 switch (context) {
8567 case PM_CONTEXT_MAIN:
8569 case PM_CONTEXT_DEFINED:
8571 case PM_CONTEXT_TERNARY:
8573 return token->type == PM_TOKEN_EOF;
8575 return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8576 case PM_CONTEXT_PREEXE:
8577 case PM_CONTEXT_POSTEXE:
8578 return token->type == PM_TOKEN_BRACE_RIGHT;
8579 case PM_CONTEXT_MODULE:
8580 case PM_CONTEXT_CLASS:
8581 case PM_CONTEXT_SCLASS:
8583 case PM_CONTEXT_DEF:
8585 return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
8586 case PM_CONTEXT_WHILE:
8587 case PM_CONTEXT_UNTIL:
8588 case PM_CONTEXT_ELSE:
8589 case PM_CONTEXT_FOR:
8597 return token->type == PM_TOKEN_KEYWORD_END;
8599 return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN;
8601 return token->type == PM_TOKEN_KEYWORD_IN;
8603 return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8604 case PM_CONTEXT_CASE_IN:
8605 return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8606 case PM_CONTEXT_IF:
8607 case PM_CONTEXT_ELSIF:
8608 return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
8609 case PM_CONTEXT_UNLESS:
8610 return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8611 case PM_CONTEXT_EMBEXPR:
8612 return token->type == PM_TOKEN_EMBEXPR_END;
8614 return token->type == PM_TOKEN_BRACE_RIGHT;
8615 case PM_CONTEXT_PARENS:
8616 return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8617 case PM_CONTEXT_BEGIN:
8625 return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8633 return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
8635 return token->type == PM_TOKEN_BRACE_RIGHT;
8637 return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
8638 case PM_CONTEXT_NONE:
8639 return false;
8640 }
8641
8642 return false;
8643}
8644
8649static pm_context_t
8650context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
8651 pm_context_node_t *context_node = parser->current_context;
8652
8653 while (context_node != NULL) {
8654 if (context_terminator(context_node->context, token)) return context_node->context;
8655 context_node = context_node->prev;
8656 }
8657
8658 return PM_CONTEXT_NONE;
8659}
8660
8661static bool
8662context_push(pm_parser_t *parser, pm_context_t context) {
8663 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
8664 if (context_node == NULL) return false;
8665
8666 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
8667
8668 if (parser->current_context == NULL) {
8669 parser->current_context = context_node;
8670 } else {
8671 context_node->prev = parser->current_context;
8672 parser->current_context = context_node;
8673 }
8674
8675 return true;
8676}
8677
8678static void
8679context_pop(pm_parser_t *parser) {
8680 pm_context_node_t *prev = parser->current_context->prev;
8681 xfree(parser->current_context);
8682 parser->current_context = prev;
8683}
8684
8685static bool
8686context_p(const pm_parser_t *parser, pm_context_t context) {
8687 pm_context_node_t *context_node = parser->current_context;
8688
8689 while (context_node != NULL) {
8690 if (context_node->context == context) return true;
8691 context_node = context_node->prev;
8692 }
8693
8694 return false;
8695}
8696
8697static bool
8698context_def_p(const pm_parser_t *parser) {
8699 pm_context_node_t *context_node = parser->current_context;
8700
8701 while (context_node != NULL) {
8702 switch (context_node->context) {
8703 case PM_CONTEXT_DEF:
8708 return true;
8709 case PM_CONTEXT_CLASS:
8713 case PM_CONTEXT_MODULE:
8717 case PM_CONTEXT_SCLASS:
8721 return false;
8722 default:
8723 context_node = context_node->prev;
8724 }
8725 }
8726
8727 return false;
8728}
8729
8734static const char *
8735context_human(pm_context_t context) {
8736 switch (context) {
8737 case PM_CONTEXT_NONE:
8738 assert(false && "unreachable");
8739 return "";
8740 case PM_CONTEXT_BEGIN: return "begin statement";
8741 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8742 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8743 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8744 case PM_CONTEXT_CASE_IN: return "'in' clause";
8745 case PM_CONTEXT_CLASS: return "class definition";
8746 case PM_CONTEXT_DEF: return "method definition";
8747 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8748 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8749 case PM_CONTEXT_DEFINED: return "'defined?' expression";
8750 case PM_CONTEXT_ELSE:
8757 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8758 case PM_CONTEXT_ELSIF: return "'elsif' clause";
8759 case PM_CONTEXT_EMBEXPR: return "embedded expression";
8766 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8767 case PM_CONTEXT_FOR: return "for loop";
8768 case PM_CONTEXT_FOR_INDEX: return "for loop index";
8769 case PM_CONTEXT_IF: return "if statement";
8770 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8771 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8772 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8773 case PM_CONTEXT_MAIN: return "top level context";
8774 case PM_CONTEXT_MODULE: return "module definition";
8775 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8776 case PM_CONTEXT_PARENS: return "parentheses";
8777 case PM_CONTEXT_POSTEXE: return "'END' block";
8778 case PM_CONTEXT_PREDICATE: return "predicate";
8779 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8787 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8788 case PM_CONTEXT_SCLASS: return "singleton class definition";
8789 case PM_CONTEXT_TERNARY: return "ternary expression";
8790 case PM_CONTEXT_UNLESS: return "unless statement";
8791 case PM_CONTEXT_UNTIL: return "until statement";
8792 case PM_CONTEXT_WHILE: return "while statement";
8793 }
8794
8795 assert(false && "unreachable");
8796 return "";
8797}
8798
8799/******************************************************************************/
8800/* Specific token lexers */
8801/******************************************************************************/
8802
8803static inline void
8804pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8805 if (invalid != NULL) {
8806 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8807 pm_parser_err(parser, invalid, invalid + 1, diag_id);
8808 }
8809}
8810
8811static size_t
8812pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8813 const uint8_t *invalid = NULL;
8814 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8815 pm_strspn_number_validate(parser, string, length, invalid);
8816 return length;
8817}
8818
8819static size_t
8820pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8821 const uint8_t *invalid = NULL;
8822 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8823 pm_strspn_number_validate(parser, string, length, invalid);
8824 return length;
8825}
8826
8827static size_t
8828pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8829 const uint8_t *invalid = NULL;
8830 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8831 pm_strspn_number_validate(parser, string, length, invalid);
8832 return length;
8833}
8834
8835static size_t
8836pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8837 const uint8_t *invalid = NULL;
8838 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8839 pm_strspn_number_validate(parser, string, length, invalid);
8840 return length;
8841}
8842
8843static pm_token_type_t
8844lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8846
8847 // Here we're going to attempt to parse the optional decimal portion of a
8848 // float. If it's not there, then it's okay and we'll just continue on.
8849 if (peek(parser) == '.') {
8850 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8851 parser->current.end += 2;
8852 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8854 } else {
8855 // If we had a . and then something else, then it's not a float
8856 // suffix on a number it's a method call or something else.
8857 return type;
8858 }
8859 }
8860
8861 // Here we're going to attempt to parse the optional exponent portion of a
8862 // float. If it's not there, it's okay and we'll just continue on.
8863 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8864 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8865 parser->current.end += 2;
8866
8867 if (pm_char_is_decimal_digit(peek(parser))) {
8868 parser->current.end++;
8869 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8870 } else {
8871 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8872 }
8873 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8874 parser->current.end++;
8875 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8876 } else {
8877 return type;
8878 }
8879
8880 *seen_e = true;
8882 }
8883
8884 return type;
8885}
8886
8887static pm_token_type_t
8888lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8890 *seen_e = false;
8891
8892 if (peek_offset(parser, -1) == '0') {
8893 switch (*parser->current.end) {
8894 // 0d1111 is a decimal number
8895 case 'd':
8896 case 'D':
8897 parser->current.end++;
8898 if (pm_char_is_decimal_digit(peek(parser))) {
8899 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8900 } else {
8901 match(parser, '_');
8902 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8903 }
8904
8905 break;
8906
8907 // 0b1111 is a binary number
8908 case 'b':
8909 case 'B':
8910 parser->current.end++;
8911 if (pm_char_is_binary_digit(peek(parser))) {
8912 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8913 } else {
8914 match(parser, '_');
8915 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8916 }
8917
8919 break;
8920
8921 // 0o1111 is an octal number
8922 case 'o':
8923 case 'O':
8924 parser->current.end++;
8925 if (pm_char_is_octal_digit(peek(parser))) {
8926 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8927 } else {
8928 match(parser, '_');
8929 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8930 }
8931
8933 break;
8934
8935 // 01111 is an octal number
8936 case '_':
8937 case '0':
8938 case '1':
8939 case '2':
8940 case '3':
8941 case '4':
8942 case '5':
8943 case '6':
8944 case '7':
8945 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8947 break;
8948
8949 // 0x1111 is a hexadecimal number
8950 case 'x':
8951 case 'X':
8952 parser->current.end++;
8953 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8954 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8955 } else {
8956 match(parser, '_');
8957 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8958 }
8959
8961 break;
8962
8963 // 0.xxx is a float
8964 case '.': {
8965 type = lex_optional_float_suffix(parser, seen_e);
8966 break;
8967 }
8968
8969 // 0exxx is a float
8970 case 'e':
8971 case 'E': {
8972 type = lex_optional_float_suffix(parser, seen_e);
8973 break;
8974 }
8975 }
8976 } else {
8977 // If it didn't start with a 0, then we'll lex as far as we can into a
8978 // decimal number.
8979 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8980
8981 // Afterward, we'll lex as far as we can into an optional float suffix.
8982 type = lex_optional_float_suffix(parser, seen_e);
8983 }
8984
8985 // At this point we have a completed number, but we want to provide the user
8986 // with a good experience if they put an additional .xxx fractional
8987 // component on the end, so we'll check for that here.
8988 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8989 const uint8_t *fraction_start = parser->current.end;
8990 const uint8_t *fraction_end = parser->current.end + 2;
8991 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8992 pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8993 }
8994
8995 return type;
8996}
8997
8998static pm_token_type_t
8999lex_numeric(pm_parser_t *parser) {
9002
9003 if (parser->current.end < parser->end) {
9004 bool seen_e = false;
9005 type = lex_numeric_prefix(parser, &seen_e);
9006
9007 const uint8_t *end = parser->current.end;
9008 pm_token_type_t suffix_type = type;
9009
9010 if (type == PM_TOKEN_INTEGER) {
9011 if (match(parser, 'r')) {
9012 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
9013
9014 if (match(parser, 'i')) {
9016 }
9017 } else if (match(parser, 'i')) {
9018 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
9019 }
9020 } else {
9021 if (!seen_e && match(parser, 'r')) {
9022 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
9023
9024 if (match(parser, 'i')) {
9026 }
9027 } else if (match(parser, 'i')) {
9028 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
9029 }
9030 }
9031
9032 const uint8_t b = peek(parser);
9033 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
9034 parser->current.end = end;
9035 } else {
9036 type = suffix_type;
9037 }
9038 }
9039
9040 return type;
9041}
9042
9043static pm_token_type_t
9044lex_global_variable(pm_parser_t *parser) {
9045 if (parser->current.end >= parser->end) {
9046 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9048 }
9049
9050 // True if multiple characters are allowed after the declaration of the
9051 // global variable. Not true when it starts with "$-".
9052 bool allow_multiple = true;
9053
9054 switch (*parser->current.end) {
9055 case '~': // $~: match-data
9056 case '*': // $*: argv
9057 case '$': // $$: pid
9058 case '?': // $?: last status
9059 case '!': // $!: error string
9060 case '@': // $@: error position
9061 case '/': // $/: input record separator
9062 case '\\': // $\: output record separator
9063 case ';': // $;: field separator
9064 case ',': // $,: output field separator
9065 case '.': // $.: last read line number
9066 case '=': // $=: ignorecase
9067 case ':': // $:: load path
9068 case '<': // $<: reading filename
9069 case '>': // $>: default output handle
9070 case '\"': // $": already loaded files
9071 parser->current.end++;
9073
9074 case '&': // $&: last match
9075 case '`': // $`: string before last match
9076 case '\'': // $': string after last match
9077 case '+': // $+: string matches last paren.
9078 parser->current.end++;
9079 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
9080
9081 case '0': {
9082 parser->current.end++;
9083 size_t width;
9084
9085 if (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
9086 do {
9087 parser->current.end += width;
9088 } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9089
9090 // $0 isn't allowed to be followed by anything.
9091 pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9092 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
9093 }
9094
9096 }
9097
9098 case '1':
9099 case '2':
9100 case '3':
9101 case '4':
9102 case '5':
9103 case '6':
9104 case '7':
9105 case '8':
9106 case '9':
9107 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
9108 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
9109
9110 case '-':
9111 parser->current.end++;
9112 allow_multiple = false;
9113 /* fallthrough */
9114 default: {
9115 size_t width;
9116
9117 if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
9118 do {
9119 parser->current.end += width;
9120 } while (allow_multiple && parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9121 } else if (pm_char_is_whitespace(peek(parser))) {
9122 // If we get here, then we have a $ followed by whitespace,
9123 // which is not allowed.
9124 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9125 } else {
9126 // If we get here, then we have a $ followed by something that
9127 // isn't recognized as a global variable.
9128 pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9129 const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9130 PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9131 }
9132
9134 }
9135 }
9136}
9137
9150static inline pm_token_type_t
9151lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
9152 if (memcmp(current_start, value, vlen) == 0) {
9153 pm_lex_state_t last_state = parser->lex_state;
9154
9155 if (parser->lex_state & PM_LEX_STATE_FNAME) {
9156 lex_state_set(parser, PM_LEX_STATE_ENDFN);
9157 } else {
9158 lex_state_set(parser, state);
9159 if (state == PM_LEX_STATE_BEG) {
9160 parser->command_start = true;
9161 }
9162
9163 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
9164 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
9165 return modifier_type;
9166 }
9167 }
9168
9169 return type;
9170 }
9171
9172 return PM_TOKEN_EOF;
9173}
9174
9175static pm_token_type_t
9176lex_identifier(pm_parser_t *parser, bool previous_command_start) {
9177 // Lex as far as we can into the current identifier.
9178 size_t width;
9179 const uint8_t *end = parser->end;
9180 const uint8_t *current_start = parser->current.start;
9181 const uint8_t *current_end = parser->current.end;
9182 bool encoding_changed = parser->encoding_changed;
9183
9184 if (encoding_changed) {
9185 while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
9186 current_end += width;
9187 }
9188 } else {
9189 while (current_end < end && (width = char_is_identifier_utf8(current_end, end)) > 0) {
9190 current_end += width;
9191 }
9192 }
9193 parser->current.end = current_end;
9194
9195 // Now cache the length of the identifier so that we can quickly compare it
9196 // against known keywords.
9197 width = (size_t) (current_end - current_start);
9198
9199 if (current_end < end) {
9200 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
9201 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
9202 // check if we're returning the defined? keyword or just an identifier.
9203 width++;
9204
9205 if (
9206 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9207 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
9208 ) {
9209 // If we're in a position where we can accept a : at the end of an
9210 // identifier, then we'll optionally accept it.
9211 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9212 (void) match(parser, ':');
9213 return PM_TOKEN_LABEL;
9214 }
9215
9216 if (parser->lex_state != PM_LEX_STATE_DOT) {
9217 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
9219 }
9220 }
9221
9222 return PM_TOKEN_METHOD_NAME;
9223 }
9224
9225 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
9226 // If we're in a position where we can accept a = at the end of an
9227 // identifier, then we'll optionally accept it.
9228 return PM_TOKEN_IDENTIFIER;
9229 }
9230
9231 if (
9232 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9233 peek(parser) == ':' && peek_offset(parser, 1) != ':'
9234 ) {
9235 // If we're in a position where we can accept a : at the end of an
9236 // identifier, then we'll optionally accept it.
9237 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9238 (void) match(parser, ':');
9239 return PM_TOKEN_LABEL;
9240 }
9241 }
9242
9243 if (parser->lex_state != PM_LEX_STATE_DOT) {
9245 switch (width) {
9246 case 2:
9247 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
9248 if (pm_do_loop_stack_p(parser)) {
9250 }
9251 return PM_TOKEN_KEYWORD_DO;
9252 }
9253
9254 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
9255 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9256 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9257 break;
9258 case 3:
9259 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9260 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9261 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9262 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9263 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9264 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9265 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9266 break;
9267 case 4:
9268 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9269 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9270 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9271 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9272 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9273 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9274 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9275 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9276 break;
9277 case 5:
9278 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9279 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9280 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9281 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9282 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9283 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9284 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9285 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9286 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9287 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9288 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
9289 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
9290 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9291 break;
9292 case 6:
9293 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9294 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9295 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
9296 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9297 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
9298 break;
9299 case 8:
9300 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9301 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9302 break;
9303 case 12:
9304 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9305 break;
9306 }
9307 }
9308
9309 if (encoding_changed) {
9310 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9311 }
9312 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9313}
9314
9319static bool
9320current_token_starts_line(pm_parser_t *parser) {
9321 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
9322}
9323
9338static pm_token_type_t
9339lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
9340 // If there is no content following this #, then we're at the end of
9341 // the string and we can safely return string content.
9342 if (pound + 1 >= parser->end) {
9343 parser->current.end = pound + 1;
9345 }
9346
9347 // Now we'll check against the character that follows the #. If it constitutes
9348 // valid interplation, we'll handle that, otherwise we'll return
9349 // PM_TOKEN_NOT_PROVIDED.
9350 switch (pound[1]) {
9351 case '@': {
9352 // In this case we may have hit an embedded instance or class variable.
9353 if (pound + 2 >= parser->end) {
9354 parser->current.end = pound + 1;
9356 }
9357
9358 // If we're looking at a @ and there's another @, then we'll skip past the
9359 // second @.
9360 const uint8_t *variable = pound + 2;
9361 if (*variable == '@' && pound + 3 < parser->end) variable++;
9362
9363 if (char_is_identifier_start(parser, variable)) {
9364 // At this point we're sure that we've either hit an embedded instance
9365 // or class variable. In this case we'll first need to check if we've
9366 // already consumed content.
9367 if (pound > parser->current.start) {
9368 parser->current.end = pound;
9370 }
9371
9372 // Otherwise we need to return the embedded variable token
9373 // and then switch to the embedded variable lex mode.
9374 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9375 parser->current.end = pound + 1;
9376 return PM_TOKEN_EMBVAR;
9377 }
9378
9379 // If we didn't get a valid interpolation, then this is just regular
9380 // string content. This is like if we get "#@-". In this case the caller
9381 // should keep lexing.
9382 parser->current.end = pound + 1;
9383 return PM_TOKEN_NOT_PROVIDED;
9384 }
9385 case '$':
9386 // In this case we may have hit an embedded global variable. If there's
9387 // not enough room, then we'll just return string content.
9388 if (pound + 2 >= parser->end) {
9389 parser->current.end = pound + 1;
9391 }
9392
9393 // This is the character that we're going to check to see if it is the
9394 // start of an identifier that would indicate that this is a global
9395 // variable.
9396 const uint8_t *check = pound + 2;
9397
9398 if (pound[2] == '-') {
9399 if (pound + 3 >= parser->end) {
9400 parser->current.end = pound + 2;
9402 }
9403
9404 check++;
9405 }
9406
9407 // If the character that we're going to check is the start of an
9408 // identifier, or we don't have a - and the character is a decimal number
9409 // or a global name punctuation character, then we've hit an embedded
9410 // global variable.
9411 if (
9412 char_is_identifier_start(parser, check) ||
9413 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
9414 ) {
9415 // In this case we've hit an embedded global variable. First check to
9416 // see if we've already consumed content. If we have, then we need to
9417 // return that content as string content first.
9418 if (pound > parser->current.start) {
9419 parser->current.end = pound;
9421 }
9422
9423 // Otherwise, we need to return the embedded variable token and switch
9424 // to the embedded variable lex mode.
9425 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9426 parser->current.end = pound + 1;
9427 return PM_TOKEN_EMBVAR;
9428 }
9429
9430 // In this case we've hit a #$ that does not indicate a global variable.
9431 // In this case we'll continue lexing past it.
9432 parser->current.end = pound + 1;
9433 return PM_TOKEN_NOT_PROVIDED;
9434 case '{':
9435 // In this case it's the start of an embedded expression. If we have
9436 // already consumed content, then we need to return that content as string
9437 // content first.
9438 if (pound > parser->current.start) {
9439 parser->current.end = pound;
9441 }
9442
9443 parser->enclosure_nesting++;
9444
9445 // Otherwise we'll skip past the #{ and begin lexing the embedded
9446 // expression.
9447 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
9448 parser->current.end = pound + 2;
9449 parser->command_start = true;
9450 pm_do_loop_stack_push(parser, false);
9452 default:
9453 // In this case we've hit a # that doesn't constitute interpolation. We'll
9454 // mark that by returning the not provided token type. This tells the
9455 // consumer to keep lexing forward.
9456 parser->current.end = pound + 1;
9457 return PM_TOKEN_NOT_PROVIDED;
9458 }
9459}
9460
9461static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
9462static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
9463static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
9464static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
9465static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
9466
9470static const bool ascii_printable_chars[] = {
9471 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9473 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9474 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9475 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9476 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
9477 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9478 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
9479};
9480
9481static inline bool
9482char_is_ascii_printable(const uint8_t b) {
9483 return (b < 0x80) && ascii_printable_chars[b];
9484}
9485
9490static inline uint8_t
9491escape_hexadecimal_digit(const uint8_t value) {
9492 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
9493}
9494
9500static inline uint32_t
9501escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9502 uint32_t value = 0;
9503 for (size_t index = 0; index < length; index++) {
9504 if (index != 0) value <<= 4;
9505 value |= escape_hexadecimal_digit(string[index]);
9506 }
9507
9508 // Here we're going to verify that the value is actually a valid Unicode
9509 // codepoint and not a surrogate pair.
9510 if (value >= 0xD800 && value <= 0xDFFF) {
9511 pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9512 return 0xFFFD;
9513 }
9514
9515 return value;
9516}
9517
9521static inline uint8_t
9522escape_byte(uint8_t value, const uint8_t flags) {
9523 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9524 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9525 return value;
9526}
9527
9531static inline void
9532escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
9533 // \u escape sequences in string-like structures implicitly change the
9534 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
9535 // literal.
9536 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
9537 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
9538 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
9539 }
9540
9542 }
9543
9544 if (value <= 0x7F) { // 0xxxxxxx
9545 pm_buffer_append_byte(buffer, (uint8_t) value);
9546 } else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
9547 pm_buffer_append_byte(buffer, (uint8_t) (0xC0 | (value >> 6)));
9548 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9549 } else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
9550 pm_buffer_append_byte(buffer, (uint8_t) (0xE0 | (value >> 12)));
9551 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
9552 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9553 } else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
9554 pm_buffer_append_byte(buffer, (uint8_t) (0xF0 | (value >> 18)));
9555 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
9556 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
9557 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9558 } else {
9559 pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
9560 pm_buffer_append_byte(buffer, 0xEF);
9561 pm_buffer_append_byte(buffer, 0xBF);
9562 pm_buffer_append_byte(buffer, 0xBD);
9563 }
9564}
9565
9570static inline void
9571escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
9572 if (byte >= 0x80) {
9573 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
9574 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
9575 }
9576
9577 parser->explicit_encoding = parser->encoding;
9578 }
9579
9580 pm_buffer_append_byte(buffer, byte);
9581}
9582
9586static inline void
9587escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
9588 size_t width;
9589 if (parser->encoding_changed) {
9590 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9591 } else {
9592 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9593 }
9594
9595 // TODO: If the character is invalid in the given encoding, then we'll just
9596 // push one byte into the buffer. This should actually be an error.
9597 width = (width == 0) ? 1 : width;
9598
9599 for (size_t index = 0; index < width; index++) {
9600 escape_write_byte_encoded(parser, buffer, *parser->current.end);
9601 parser->current.end++;
9602 }
9603}
9604
9620static inline void
9621escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9622 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9623 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9624 }
9625
9626 escape_write_byte_encoded(parser, buffer, byte);
9627}
9628
9634static void
9635escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
9636#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
9637
9638 PM_PARSER_WARN_TOKEN_FORMAT(
9639 parser,
9640 parser->current,
9641 PM_WARN_INVALID_CHARACTER,
9642 FLAG(flags),
9643 FLAG(flag),
9644 type
9645 );
9646
9647#undef FLAG
9648}
9649
9653static void
9654escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9655 switch (peek(parser)) {
9656 case '\\': {
9657 parser->current.end++;
9658 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9659 return;
9660 }
9661 case '\'': {
9662 parser->current.end++;
9663 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9664 return;
9665 }
9666 case 'a': {
9667 parser->current.end++;
9668 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9669 return;
9670 }
9671 case 'b': {
9672 parser->current.end++;
9673 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9674 return;
9675 }
9676 case 'e': {
9677 parser->current.end++;
9678 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9679 return;
9680 }
9681 case 'f': {
9682 parser->current.end++;
9683 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9684 return;
9685 }
9686 case 'n': {
9687 parser->current.end++;
9688 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9689 return;
9690 }
9691 case 'r': {
9692 parser->current.end++;
9693 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9694 return;
9695 }
9696 case 's': {
9697 parser->current.end++;
9698 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9699 return;
9700 }
9701 case 't': {
9702 parser->current.end++;
9703 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9704 return;
9705 }
9706 case 'v': {
9707 parser->current.end++;
9708 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9709 return;
9710 }
9711 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9712 uint8_t value = (uint8_t) (*parser->current.end - '0');
9713 parser->current.end++;
9714
9715 if (pm_char_is_octal_digit(peek(parser))) {
9716 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9717 parser->current.end++;
9718
9719 if (pm_char_is_octal_digit(peek(parser))) {
9720 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9721 parser->current.end++;
9722 }
9723 }
9724
9725 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9726 return;
9727 }
9728 case 'x': {
9729 const uint8_t *start = parser->current.end - 1;
9730
9731 parser->current.end++;
9732 uint8_t byte = peek(parser);
9733
9734 if (pm_char_is_hexadecimal_digit(byte)) {
9735 uint8_t value = escape_hexadecimal_digit(byte);
9736 parser->current.end++;
9737
9738 byte = peek(parser);
9739 if (pm_char_is_hexadecimal_digit(byte)) {
9740 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9741 parser->current.end++;
9742 }
9743
9744 value = escape_byte(value, flags);
9745 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9746 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9747 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9748 } else {
9749 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9750 }
9751 }
9752
9753 escape_write_byte_encoded(parser, buffer, value);
9754 } else {
9755 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9756 }
9757
9758 return;
9759 }
9760 case 'u': {
9761 const uint8_t *start = parser->current.end - 1;
9762 parser->current.end++;
9763
9764 if (parser->current.end == parser->end) {
9765 const uint8_t *start = parser->current.end - 2;
9766 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9767 } else if (peek(parser) == '{') {
9768 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9769 parser->current.end++;
9770
9771 size_t whitespace;
9772 while (true) {
9773 if ((whitespace = pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9774 parser->current.end += whitespace;
9775 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9776 // This is super hacky, but it gets us nicer error
9777 // messages because we can still pass it off to the
9778 // regular expression engine even if we hit an
9779 // unterminated regular expression.
9780 parser->current.end += 2;
9781 } else {
9782 break;
9783 }
9784 }
9785
9786 const uint8_t *extra_codepoints_start = NULL;
9787 int codepoints_count = 0;
9788
9789 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9790 const uint8_t *unicode_start = parser->current.end;
9791 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9792
9793 if (hexadecimal_length > 6) {
9794 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9795 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9796 } else if (hexadecimal_length == 0) {
9797 // there are not hexadecimal characters
9798
9799 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9800 // If this is a regular expression, we are going to
9801 // let the regular expression engine handle this
9802 // error instead of us.
9803 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9804 } else {
9805 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9806 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9807 }
9808
9809 return;
9810 }
9811
9812 parser->current.end += hexadecimal_length;
9813 codepoints_count++;
9814 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9815 extra_codepoints_start = unicode_start;
9816 }
9817
9818 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9819 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9820
9821 parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
9822 }
9823
9824 // ?\u{nnnn} character literal should contain only one codepoint
9825 // and cannot be like ?\u{nnnn mmmm}.
9826 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9827 pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9828 }
9829
9830 if (parser->current.end == parser->end) {
9831 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9832 } else if (peek(parser) == '}') {
9833 parser->current.end++;
9834 } else {
9835 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9836 // If this is a regular expression, we are going to let
9837 // the regular expression engine handle this error
9838 // instead of us.
9839 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9840 } else {
9841 pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9842 }
9843 }
9844
9845 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9846 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9847 }
9848 } else {
9849 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9850
9851 if (length == 0) {
9852 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9853 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9854 } else {
9855 const uint8_t *start = parser->current.end - 2;
9856 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9857 }
9858 } else if (length == 4) {
9859 uint32_t value = escape_unicode(parser, parser->current.end, 4);
9860
9861 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9862 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9863 }
9864
9865 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9866 parser->current.end += 4;
9867 } else {
9868 parser->current.end += length;
9869
9870 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9871 // If this is a regular expression, we are going to let
9872 // the regular expression engine handle this error
9873 // instead of us.
9874 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9875 } else {
9876 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9877 }
9878 }
9879 }
9880
9881 return;
9882 }
9883 case 'c': {
9884 parser->current.end++;
9885 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9886 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9887 }
9888
9889 if (parser->current.end == parser->end) {
9890 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9891 return;
9892 }
9893
9894 uint8_t peeked = peek(parser);
9895 switch (peeked) {
9896 case '?': {
9897 parser->current.end++;
9898 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9899 return;
9900 }
9901 case '\\':
9902 parser->current.end++;
9903
9904 if (match(parser, 'u') || match(parser, 'U')) {
9905 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9906 return;
9907 }
9908
9909 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9910 return;
9911 case ' ':
9912 parser->current.end++;
9913 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9914 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9915 return;
9916 case '\t':
9917 parser->current.end++;
9918 escape_read_warn(parser, flags, 0, "\\t");
9919 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9920 return;
9921 default: {
9922 if (!char_is_ascii_printable(peeked)) {
9923 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9924 return;
9925 }
9926
9927 parser->current.end++;
9928 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9929 return;
9930 }
9931 }
9932 }
9933 case 'C': {
9934 parser->current.end++;
9935 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9936 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9937 }
9938
9939 if (peek(parser) != '-') {
9940 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9941 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9942 return;
9943 }
9944
9945 parser->current.end++;
9946 if (parser->current.end == parser->end) {
9947 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9948 return;
9949 }
9950
9951 uint8_t peeked = peek(parser);
9952 switch (peeked) {
9953 case '?': {
9954 parser->current.end++;
9955 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9956 return;
9957 }
9958 case '\\':
9959 parser->current.end++;
9960
9961 if (match(parser, 'u') || match(parser, 'U')) {
9962 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9963 return;
9964 }
9965
9966 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9967 return;
9968 case ' ':
9969 parser->current.end++;
9970 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9971 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9972 return;
9973 case '\t':
9974 parser->current.end++;
9975 escape_read_warn(parser, flags, 0, "\\t");
9976 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9977 return;
9978 default: {
9979 if (!char_is_ascii_printable(peeked)) {
9980 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9981 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9982 return;
9983 }
9984
9985 parser->current.end++;
9986 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9987 return;
9988 }
9989 }
9990 }
9991 case 'M': {
9992 parser->current.end++;
9993 if (flags & PM_ESCAPE_FLAG_META) {
9994 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9995 }
9996
9997 if (peek(parser) != '-') {
9998 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9999 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10000 return;
10001 }
10002
10003 parser->current.end++;
10004 if (parser->current.end == parser->end) {
10005 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
10006 return;
10007 }
10008
10009 uint8_t peeked = peek(parser);
10010 switch (peeked) {
10011 case '\\':
10012 parser->current.end++;
10013
10014 if (match(parser, 'u') || match(parser, 'U')) {
10015 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
10016 return;
10017 }
10018
10019 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
10020 return;
10021 case ' ':
10022 parser->current.end++;
10023 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
10024 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10025 return;
10026 case '\t':
10027 parser->current.end++;
10028 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
10029 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10030 return;
10031 default:
10032 if (!char_is_ascii_printable(peeked)) {
10033 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10034 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10035 return;
10036 }
10037
10038 parser->current.end++;
10039 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10040 return;
10041 }
10042 }
10043 case '\r': {
10044 if (peek_offset(parser, 1) == '\n') {
10045 parser->current.end += 2;
10046 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10047 return;
10048 }
10049 }
10050 /* fallthrough */
10051 default: {
10052 if (parser->current.end < parser->end) {
10053 escape_write_escape_encoded(parser, buffer);
10054 } else {
10055 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10056 }
10057 return;
10058 }
10059 }
10060}
10061
10087static pm_token_type_t
10088lex_question_mark(pm_parser_t *parser) {
10089 if (lex_state_end_p(parser)) {
10090 lex_state_set(parser, PM_LEX_STATE_BEG);
10092 }
10093
10094 if (parser->current.end >= parser->end) {
10095 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
10096 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10098 }
10099
10100 if (pm_char_is_whitespace(*parser->current.end)) {
10101 lex_state_set(parser, PM_LEX_STATE_BEG);
10103 }
10104
10105 lex_state_set(parser, PM_LEX_STATE_BEG);
10106
10107 if (match(parser, '\\')) {
10108 lex_state_set(parser, PM_LEX_STATE_END);
10109
10110 pm_buffer_t buffer;
10111 pm_buffer_init_capacity(&buffer, 3);
10112
10113 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
10114 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
10115
10117 } else {
10118 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10119
10120 // Ternary operators can have a ? immediately followed by an identifier
10121 // which starts with an underscore. We check for this case here.
10122 if (
10123 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
10124 (
10125 (parser->current.end + encoding_width >= parser->end) ||
10126 !char_is_identifier(parser, parser->current.end + encoding_width)
10127 )
10128 ) {
10129 lex_state_set(parser, PM_LEX_STATE_END);
10130 parser->current.end += encoding_width;
10131 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10133 }
10134 }
10135
10137}
10138
10143static pm_token_type_t
10144lex_at_variable(pm_parser_t *parser) {
10146 size_t width;
10147
10148 if (parser->current.end < parser->end && (width = char_is_identifier_start(parser, parser->current.end)) > 0) {
10149 parser->current.end += width;
10150
10151 while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
10152 parser->current.end += width;
10153 }
10154 } else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
10155 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
10156 if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
10157 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
10158 }
10159
10160 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10161 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
10162 } else {
10163 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
10164 pm_parser_err_token(parser, &parser->current, diag_id);
10165 }
10166
10167 // If we're lexing an embedded variable, then we need to pop back into the
10168 // parent lex context.
10169 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10170 lex_mode_pop(parser);
10171 }
10172
10173 return type;
10174}
10175
10179static inline void
10180parser_lex_callback(pm_parser_t *parser) {
10181 if (parser->lex_callback) {
10182 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
10183 }
10184}
10185
10189static inline pm_comment_t *
10190parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
10191 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
10192 if (comment == NULL) return NULL;
10193
10194 *comment = (pm_comment_t) {
10195 .type = type,
10196 .location = { parser->current.start, parser->current.end }
10197 };
10198
10199 return comment;
10200}
10201
10207static pm_token_type_t
10208lex_embdoc(pm_parser_t *parser) {
10209 // First, lex out the EMBDOC_BEGIN token.
10210 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10211
10212 if (newline == NULL) {
10213 parser->current.end = parser->end;
10214 } else {
10215 pm_newline_list_append(&parser->newline_list, newline);
10216 parser->current.end = newline + 1;
10217 }
10218
10219 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
10220 parser_lex_callback(parser);
10221
10222 // Now, create a comment that is going to be attached to the parser.
10223 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
10224 if (comment == NULL) return PM_TOKEN_EOF;
10225
10226 // Now, loop until we find the end of the embedded documentation or the end
10227 // of the file.
10228 while (parser->current.end + 4 <= parser->end) {
10229 parser->current.start = parser->current.end;
10230
10231 // If we've hit the end of the embedded documentation then we'll return
10232 // that token here.
10233 if (
10234 (memcmp(parser->current.end, "=end", 4) == 0) &&
10235 (
10236 (parser->current.end + 4 == parser->end) || // end of file
10237 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
10238 (parser->current.end[4] == '\0') || // NUL or end of script
10239 (parser->current.end[4] == '\004') || // ^D
10240 (parser->current.end[4] == '\032') // ^Z
10241 )
10242 ) {
10243 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10244
10245 if (newline == NULL) {
10246 parser->current.end = parser->end;
10247 } else {
10248 pm_newline_list_append(&parser->newline_list, newline);
10249 parser->current.end = newline + 1;
10250 }
10251
10252 parser->current.type = PM_TOKEN_EMBDOC_END;
10253 parser_lex_callback(parser);
10254
10255 comment->location.end = parser->current.end;
10256 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10257
10258 return PM_TOKEN_EMBDOC_END;
10259 }
10260
10261 // Otherwise, we'll parse until the end of the line and return a line of
10262 // embedded documentation.
10263 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10264
10265 if (newline == NULL) {
10266 parser->current.end = parser->end;
10267 } else {
10268 pm_newline_list_append(&parser->newline_list, newline);
10269 parser->current.end = newline + 1;
10270 }
10271
10272 parser->current.type = PM_TOKEN_EMBDOC_LINE;
10273 parser_lex_callback(parser);
10274 }
10275
10276 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
10277
10278 comment->location.end = parser->current.end;
10279 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10280
10281 return PM_TOKEN_EOF;
10282}
10283
10289static inline void
10290parser_lex_ignored_newline(pm_parser_t *parser) {
10291 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
10292 parser_lex_callback(parser);
10293}
10294
10304static inline void
10305parser_flush_heredoc_end(pm_parser_t *parser) {
10306 assert(parser->heredoc_end <= parser->end);
10307 parser->next_start = parser->heredoc_end;
10308 parser->heredoc_end = NULL;
10309}
10310
10314static bool
10315parser_end_of_line_p(const pm_parser_t *parser) {
10316 const uint8_t *cursor = parser->current.end;
10317
10318 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
10319 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
10320 }
10321
10322 return true;
10323}
10324
10343typedef struct {
10349
10354 const uint8_t *cursor;
10356
10376
10380static inline void
10381pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
10382 pm_buffer_append_byte(&token_buffer->buffer, byte);
10383}
10384
10385static inline void
10386pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
10387 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
10388}
10389
10393static inline size_t
10394parser_char_width(const pm_parser_t *parser) {
10395 size_t width;
10396 if (parser->encoding_changed) {
10397 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10398 } else {
10399 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
10400 }
10401
10402 // TODO: If the character is invalid in the given encoding, then we'll just
10403 // push one byte into the buffer. This should actually be an error.
10404 return (width == 0 ? 1 : width);
10405}
10406
10410static void
10411pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
10412 size_t width = parser_char_width(parser);
10413 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
10414 parser->current.end += width;
10415}
10416
10417static void
10418pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
10419 size_t width = parser_char_width(parser);
10420 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
10421 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
10422 parser->current.end += width;
10423}
10424
10425static bool
10426pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
10427 for (size_t index = 0; index < length; index++) {
10428 if (value[index] & 0x80) return false;
10429 }
10430
10431 return true;
10432}
10433
10440static inline void
10441pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10442 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
10443}
10444
10445static inline void
10446pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10447 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
10448 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
10449 pm_buffer_free(&token_buffer->regexp_buffer);
10450}
10451
10461static void
10462pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10463 if (token_buffer->cursor == NULL) {
10464 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10465 } else {
10466 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
10467 pm_token_buffer_copy(parser, token_buffer);
10468 }
10469}
10470
10471static void
10472pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10473 if (token_buffer->base.cursor == NULL) {
10474 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10475 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
10476 } else {
10477 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10478 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10479 pm_regexp_token_buffer_copy(parser, token_buffer);
10480 }
10481}
10482
10483#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
10484
10493static void
10494pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10495 const uint8_t *start;
10496 if (token_buffer->cursor == NULL) {
10497 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10498 start = parser->current.start;
10499 } else {
10500 start = token_buffer->cursor;
10501 }
10502
10503 const uint8_t *end = parser->current.end - 1;
10504 assert(end >= start);
10505 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10506
10507 token_buffer->cursor = end;
10508}
10509
10510static void
10511pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10512 const uint8_t *start;
10513 if (token_buffer->base.cursor == NULL) {
10514 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10515 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10516 start = parser->current.start;
10517 } else {
10518 start = token_buffer->base.cursor;
10519 }
10520
10521 const uint8_t *end = parser->current.end - 1;
10522 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
10523 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
10524
10525 token_buffer->base.cursor = end;
10526}
10527
10528#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
10529
10534static inline size_t
10535pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
10536 size_t whitespace = 0;
10537
10538 switch (indent) {
10539 case PM_HEREDOC_INDENT_NONE:
10540 // Do nothing, we can't match a terminator with
10541 // indentation and there's no need to calculate common
10542 // whitespace.
10543 break;
10544 case PM_HEREDOC_INDENT_DASH:
10545 // Skip past inline whitespace.
10546 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
10547 break;
10548 case PM_HEREDOC_INDENT_TILDE:
10549 // Skip past inline whitespace and calculate common
10550 // whitespace.
10551 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
10552 if (**cursor == '\t') {
10553 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
10554 } else {
10555 whitespace++;
10556 }
10557 (*cursor)++;
10558 }
10559
10560 break;
10561 }
10562
10563 return whitespace;
10564}
10565
10570static uint8_t
10571pm_lex_percent_delimiter(pm_parser_t *parser) {
10572 size_t eol_length = match_eol(parser);
10573
10574 if (eol_length) {
10575 if (parser->heredoc_end) {
10576 // If we have already lexed a heredoc, then the newline has already
10577 // been added to the list. In this case we want to just flush the
10578 // heredoc end.
10579 parser_flush_heredoc_end(parser);
10580 } else {
10581 // Otherwise, we'll add the newline to the list of newlines.
10582 pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10583 }
10584
10585 uint8_t delimiter = *parser->current.end;
10586
10587 // If our delimiter is \r\n, we want to treat it as if it's \n.
10588 // For example, %\r\nfoo\r\n should be "foo"
10589 if (eol_length == 2) {
10590 delimiter = *(parser->current.end + 1);
10591 }
10592
10593 parser->current.end += eol_length;
10594 return delimiter;
10595 }
10596
10597 return *parser->current.end++;
10598}
10599
10604#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
10605
10612static void
10613parser_lex(pm_parser_t *parser) {
10614 assert(parser->current.end <= parser->end);
10615 parser->previous = parser->current;
10616
10617 // This value mirrors cmd_state from CRuby.
10618 bool previous_command_start = parser->command_start;
10619 parser->command_start = false;
10620
10621 // This is used to communicate to the newline lexing function that we've
10622 // already seen a comment.
10623 bool lexed_comment = false;
10624
10625 // Here we cache the current value of the semantic token seen flag. This is
10626 // used to reset it in case we find a token that shouldn't flip this flag.
10627 unsigned int semantic_token_seen = parser->semantic_token_seen;
10628 parser->semantic_token_seen = true;
10629
10630 switch (parser->lex_modes.current->mode) {
10631 case PM_LEX_DEFAULT:
10632 case PM_LEX_EMBEXPR:
10633 case PM_LEX_EMBVAR:
10634
10635 // We have a specific named label here because we are going to jump back to
10636 // this location in the event that we have lexed a token that should not be
10637 // returned to the parser. This includes comments, ignored newlines, and
10638 // invalid tokens of some form.
10639 lex_next_token: {
10640 // If we have the special next_start pointer set, then we're going to jump
10641 // to that location and start lexing from there.
10642 if (parser->next_start != NULL) {
10643 parser->current.end = parser->next_start;
10644 parser->next_start = NULL;
10645 }
10646
10647 // This value mirrors space_seen from CRuby. It tracks whether or not
10648 // space has been eaten before the start of the next token.
10649 bool space_seen = false;
10650
10651 // First, we're going to skip past any whitespace at the front of the next
10652 // token.
10653 bool chomping = true;
10654 while (parser->current.end < parser->end && chomping) {
10655 switch (*parser->current.end) {
10656 case ' ':
10657 case '\t':
10658 case '\f':
10659 case '\v':
10660 parser->current.end++;
10661 space_seen = true;
10662 break;
10663 case '\r':
10664 if (match_eol_offset(parser, 1)) {
10665 chomping = false;
10666 } else {
10667 pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10668 parser->current.end++;
10669 space_seen = true;
10670 }
10671 break;
10672 case '\\': {
10673 size_t eol_length = match_eol_offset(parser, 1);
10674 if (eol_length) {
10675 if (parser->heredoc_end) {
10676 parser->current.end = parser->heredoc_end;
10677 parser->heredoc_end = NULL;
10678 } else {
10679 parser->current.end += eol_length + 1;
10680 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10681 space_seen = true;
10682 }
10683 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10684 parser->current.end += 2;
10685 } else {
10686 chomping = false;
10687 }
10688
10689 break;
10690 }
10691 default:
10692 chomping = false;
10693 break;
10694 }
10695 }
10696
10697 // Next, we'll set to start of this token to be the current end.
10698 parser->current.start = parser->current.end;
10699
10700 // We'll check if we're at the end of the file. If we are, then we
10701 // need to return the EOF token.
10702 if (parser->current.end >= parser->end) {
10703 // If we hit EOF, but the EOF came immediately after a newline,
10704 // set the start of the token to the newline. This way any EOF
10705 // errors will be reported as happening on that line rather than
10706 // a line after. For example "foo(\n" should report an error
10707 // on line 1 even though EOF technically occurs on line 2.
10708 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10709 parser->current.start -= 1;
10710 }
10711 LEX(PM_TOKEN_EOF);
10712 }
10713
10714 // Finally, we'll check the current character to determine the next
10715 // token.
10716 switch (*parser->current.end++) {
10717 case '\0': // NUL or end of script
10718 case '\004': // ^D
10719 case '\032': // ^Z
10720 parser->current.end--;
10721 LEX(PM_TOKEN_EOF);
10722
10723 case '#': { // comments
10724 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10725 parser->current.end = ending == NULL ? parser->end : ending;
10726
10727 // If we found a comment while lexing, then we're going to
10728 // add it to the list of comments in the file and keep
10729 // lexing.
10730 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10731 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10732
10733 if (ending) parser->current.end++;
10734 parser->current.type = PM_TOKEN_COMMENT;
10735 parser_lex_callback(parser);
10736
10737 // Here, parse the comment to see if it's a magic comment
10738 // and potentially change state on the parser.
10739 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10740 ptrdiff_t length = parser->current.end - parser->current.start;
10741
10742 // If we didn't find a magic comment within the first
10743 // pass and we're at the start of the file, then we need
10744 // to do another pass to potentially find other patterns
10745 // for encoding comments.
10746 if (length >= 10 && !parser->encoding_locked) {
10747 parser_lex_magic_comment_encoding(parser);
10748 }
10749 }
10750
10751 lexed_comment = true;
10752 }
10753 /* fallthrough */
10754 case '\r':
10755 case '\n': {
10756 parser->semantic_token_seen = semantic_token_seen & 0x1;
10757 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10758
10759 if (eol_length) {
10760 // The only way you can have carriage returns in this
10761 // particular loop is if you have a carriage return
10762 // followed by a newline. In that case we'll just skip
10763 // over the carriage return and continue lexing, in
10764 // order to make it so that the newline token
10765 // encapsulates both the carriage return and the
10766 // newline. Note that we need to check that we haven't
10767 // already lexed a comment here because that falls
10768 // through into here as well.
10769 if (!lexed_comment) {
10770 parser->current.end += eol_length - 1; // skip CR
10771 }
10772
10773 if (parser->heredoc_end == NULL) {
10774 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10775 }
10776 }
10777
10778 if (parser->heredoc_end) {
10779 parser_flush_heredoc_end(parser);
10780 }
10781
10782 // If this is an ignored newline, then we can continue lexing after
10783 // calling the callback with the ignored newline token.
10784 switch (lex_state_ignored_p(parser)) {
10785 case PM_IGNORED_NEWLINE_NONE:
10786 break;
10787 case PM_IGNORED_NEWLINE_PATTERN:
10788 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10789 if (!lexed_comment) parser_lex_ignored_newline(parser);
10790 lex_state_set(parser, PM_LEX_STATE_BEG);
10791 parser->command_start = true;
10792 parser->current.type = PM_TOKEN_NEWLINE;
10793 return;
10794 }
10795 /* fallthrough */
10796 case PM_IGNORED_NEWLINE_ALL:
10797 if (!lexed_comment) parser_lex_ignored_newline(parser);
10798 lexed_comment = false;
10799 goto lex_next_token;
10800 }
10801
10802 // Here we need to look ahead and see if there is a call operator
10803 // (either . or &.) that starts the next line. If there is, then this
10804 // is going to become an ignored newline and we're going to instead
10805 // return the call operator.
10806 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10807 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10808
10809 if (next_content < parser->end) {
10810 // If we hit a comment after a newline, then we're going to check
10811 // if it's ignored or if it's followed by a method call ('.').
10812 // If it is, then we're going to call the
10813 // callback with an ignored newline and then continue lexing.
10814 // Otherwise we'll return a regular newline.
10815 if (next_content[0] == '#') {
10816 // Here we look for a "." or "&." following a "\n".
10817 const uint8_t *following = next_newline(next_content, parser->end - next_content);
10818
10819 while (following && (following + 1 < parser->end)) {
10820 following++;
10821 following += pm_strspn_inline_whitespace(following, parser->end - following);
10822
10823 // If this is not followed by a comment, then we can break out
10824 // of this loop.
10825 if (peek_at(parser, following) != '#') break;
10826
10827 // If there is a comment, then we need to find the end of the
10828 // comment and continue searching from there.
10829 following = next_newline(following, parser->end - following);
10830 }
10831
10832 // If the lex state was ignored, or we hit a '.' or a '&.',
10833 // we will lex the ignored newline
10834 if (
10835 lex_state_ignored_p(parser) ||
10836 (following && (
10837 (peek_at(parser, following) == '.') ||
10838 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10839 ))
10840 ) {
10841 if (!lexed_comment) parser_lex_ignored_newline(parser);
10842 lexed_comment = false;
10843 goto lex_next_token;
10844 }
10845 }
10846
10847 // If we hit a . after a newline, then we're in a call chain and
10848 // we need to return the call operator.
10849 if (next_content[0] == '.') {
10850 // To match ripper, we need to emit an ignored newline even though
10851 // it's a real newline in the case that we have a beginless range
10852 // on a subsequent line.
10853 if (peek_at(parser, next_content + 1) == '.') {
10854 if (!lexed_comment) parser_lex_ignored_newline(parser);
10855 lex_state_set(parser, PM_LEX_STATE_BEG);
10856 parser->command_start = true;
10857 parser->current.type = PM_TOKEN_NEWLINE;
10858 return;
10859 }
10860
10861 if (!lexed_comment) parser_lex_ignored_newline(parser);
10862 lex_state_set(parser, PM_LEX_STATE_DOT);
10863 parser->current.start = next_content;
10864 parser->current.end = next_content + 1;
10865 parser->next_start = NULL;
10866 LEX(PM_TOKEN_DOT);
10867 }
10868
10869 // If we hit a &. after a newline, then we're in a call chain and
10870 // we need to return the call operator.
10871 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10872 if (!lexed_comment) parser_lex_ignored_newline(parser);
10873 lex_state_set(parser, PM_LEX_STATE_DOT);
10874 parser->current.start = next_content;
10875 parser->current.end = next_content + 2;
10876 parser->next_start = NULL;
10878 }
10879 }
10880
10881 // At this point we know this is a regular newline, and we can set the
10882 // necessary state and return the token.
10883 lex_state_set(parser, PM_LEX_STATE_BEG);
10884 parser->command_start = true;
10885 parser->current.type = PM_TOKEN_NEWLINE;
10886 if (!lexed_comment) parser_lex_callback(parser);
10887 return;
10888 }
10889
10890 // ,
10891 case ',':
10892 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10893 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10894 }
10895
10896 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10897 LEX(PM_TOKEN_COMMA);
10898
10899 // (
10900 case '(': {
10902
10903 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10905 }
10906
10907 parser->enclosure_nesting++;
10908 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10909 pm_do_loop_stack_push(parser, false);
10910 LEX(type);
10911 }
10912
10913 // )
10914 case ')':
10915 parser->enclosure_nesting--;
10916 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10917 pm_do_loop_stack_pop(parser);
10919
10920 // ;
10921 case ';':
10922 lex_state_set(parser, PM_LEX_STATE_BEG);
10923 parser->command_start = true;
10924 LEX(PM_TOKEN_SEMICOLON);
10925
10926 // [ [] []=
10927 case '[':
10928 parser->enclosure_nesting++;
10930
10931 if (lex_state_operator_p(parser)) {
10932 if (match(parser, ']')) {
10933 parser->enclosure_nesting--;
10934 lex_state_set(parser, PM_LEX_STATE_ARG);
10936 }
10937
10938 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10939 LEX(type);
10940 }
10941
10942 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10944 }
10945
10946 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10947 pm_do_loop_stack_push(parser, false);
10948 LEX(type);
10949
10950 // ]
10951 case ']':
10952 parser->enclosure_nesting--;
10953 lex_state_set(parser, PM_LEX_STATE_END);
10954 pm_do_loop_stack_pop(parser);
10956
10957 // {
10958 case '{': {
10960
10961 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10962 // This { begins a lambda
10963 parser->command_start = true;
10964 lex_state_set(parser, PM_LEX_STATE_BEG);
10966 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10967 // This { begins a hash literal
10968 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10969 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10970 // This { begins a block
10971 parser->command_start = true;
10972 lex_state_set(parser, PM_LEX_STATE_BEG);
10973 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10974 // This { begins a block on a command
10975 parser->command_start = true;
10976 lex_state_set(parser, PM_LEX_STATE_BEG);
10977 } else {
10978 // This { begins a hash literal
10979 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10980 }
10981
10982 parser->enclosure_nesting++;
10983 parser->brace_nesting++;
10984 pm_do_loop_stack_push(parser, false);
10985
10986 LEX(type);
10987 }
10988
10989 // }
10990 case '}':
10991 parser->enclosure_nesting--;
10992 pm_do_loop_stack_pop(parser);
10993
10994 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10995 lex_mode_pop(parser);
10997 }
10998
10999 parser->brace_nesting--;
11000 lex_state_set(parser, PM_LEX_STATE_END);
11002
11003 // * ** **= *=
11004 case '*': {
11005 if (match(parser, '*')) {
11006 if (match(parser, '=')) {
11007 lex_state_set(parser, PM_LEX_STATE_BEG);
11009 }
11010
11012
11013 if (lex_state_spcarg_p(parser, space_seen)) {
11014 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
11016 } else if (lex_state_beg_p(parser)) {
11018 } else if (ambiguous_operator_p(parser, space_seen)) {
11019 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
11020 }
11021
11022 if (lex_state_operator_p(parser)) {
11023 lex_state_set(parser, PM_LEX_STATE_ARG);
11024 } else {
11025 lex_state_set(parser, PM_LEX_STATE_BEG);
11026 }
11027
11028 LEX(type);
11029 }
11030
11031 if (match(parser, '=')) {
11032 lex_state_set(parser, PM_LEX_STATE_BEG);
11034 }
11035
11037
11038 if (lex_state_spcarg_p(parser, space_seen)) {
11039 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
11041 } else if (lex_state_beg_p(parser)) {
11043 } else if (ambiguous_operator_p(parser, space_seen)) {
11044 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
11045 }
11046
11047 if (lex_state_operator_p(parser)) {
11048 lex_state_set(parser, PM_LEX_STATE_ARG);
11049 } else {
11050 lex_state_set(parser, PM_LEX_STATE_BEG);
11051 }
11052
11053 LEX(type);
11054 }
11055
11056 // ! != !~ !@
11057 case '!':
11058 if (lex_state_operator_p(parser)) {
11059 lex_state_set(parser, PM_LEX_STATE_ARG);
11060 if (match(parser, '@')) {
11061 LEX(PM_TOKEN_BANG);
11062 }
11063 } else {
11064 lex_state_set(parser, PM_LEX_STATE_BEG);
11065 }
11066
11067 if (match(parser, '=')) {
11069 }
11070
11071 if (match(parser, '~')) {
11073 }
11074
11075 LEX(PM_TOKEN_BANG);
11076
11077 // = => =~ == === =begin
11078 case '=':
11079 if (
11080 current_token_starts_line(parser) &&
11081 (parser->current.end + 5 <= parser->end) &&
11082 memcmp(parser->current.end, "begin", 5) == 0 &&
11083 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
11084 ) {
11085 pm_token_type_t type = lex_embdoc(parser);
11086 if (type == PM_TOKEN_EOF) {
11087 LEX(type);
11088 }
11089
11090 goto lex_next_token;
11091 }
11092
11093 if (lex_state_operator_p(parser)) {
11094 lex_state_set(parser, PM_LEX_STATE_ARG);
11095 } else {
11096 lex_state_set(parser, PM_LEX_STATE_BEG);
11097 }
11098
11099 if (match(parser, '>')) {
11101 }
11102
11103 if (match(parser, '~')) {
11105 }
11106
11107 if (match(parser, '=')) {
11108 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
11109 }
11110
11111 LEX(PM_TOKEN_EQUAL);
11112
11113 // < << <<= <= <=>
11114 case '<':
11115 if (match(parser, '<')) {
11116 if (
11117 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
11118 !lex_state_end_p(parser) &&
11119 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
11120 ) {
11121 const uint8_t *end = parser->current.end;
11122
11123 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
11124 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
11125
11126 if (match(parser, '-')) {
11127 indent = PM_HEREDOC_INDENT_DASH;
11128 }
11129 else if (match(parser, '~')) {
11130 indent = PM_HEREDOC_INDENT_TILDE;
11131 }
11132
11133 if (match(parser, '`')) {
11134 quote = PM_HEREDOC_QUOTE_BACKTICK;
11135 }
11136 else if (match(parser, '"')) {
11137 quote = PM_HEREDOC_QUOTE_DOUBLE;
11138 }
11139 else if (match(parser, '\'')) {
11140 quote = PM_HEREDOC_QUOTE_SINGLE;
11141 }
11142
11143 const uint8_t *ident_start = parser->current.end;
11144 size_t width = 0;
11145
11146 if (parser->current.end >= parser->end) {
11147 parser->current.end = end;
11148 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
11149 parser->current.end = end;
11150 } else {
11151 if (quote == PM_HEREDOC_QUOTE_NONE) {
11152 parser->current.end += width;
11153
11154 while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) {
11155 parser->current.end += width;
11156 }
11157 } else {
11158 // If we have quotes, then we're going to go until we find the
11159 // end quote.
11160 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
11161 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
11162 parser->current.end++;
11163 }
11164 }
11165
11166 size_t ident_length = (size_t) (parser->current.end - ident_start);
11167 bool ident_error = false;
11168
11169 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
11170 pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
11171 ident_error = true;
11172 }
11173
11174 parser->explicit_encoding = NULL;
11175 lex_mode_push(parser, (pm_lex_mode_t) {
11176 .mode = PM_LEX_HEREDOC,
11177 .as.heredoc = {
11178 .base = {
11179 .ident_start = ident_start,
11180 .ident_length = ident_length,
11181 .quote = quote,
11182 .indent = indent
11183 },
11184 .next_start = parser->current.end,
11185 .common_whitespace = NULL,
11186 .line_continuation = false
11187 }
11188 });
11189
11190 if (parser->heredoc_end == NULL) {
11191 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
11192
11193 if (body_start == NULL) {
11194 // If there is no newline after the heredoc identifier, then
11195 // this is not a valid heredoc declaration. In this case we
11196 // will add an error, but we will still return a heredoc
11197 // start.
11198 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
11199 body_start = parser->end;
11200 } else {
11201 // Otherwise, we want to indicate that the body of the
11202 // heredoc starts on the character after the next newline.
11203 pm_newline_list_append(&parser->newline_list, body_start);
11204 body_start++;
11205 }
11206
11207 parser->next_start = body_start;
11208 } else {
11209 parser->next_start = parser->heredoc_end;
11210 }
11211
11213 }
11214 }
11215
11216 if (match(parser, '=')) {
11217 lex_state_set(parser, PM_LEX_STATE_BEG);
11219 }
11220
11221 if (ambiguous_operator_p(parser, space_seen)) {
11222 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11223 }
11224
11225 if (lex_state_operator_p(parser)) {
11226 lex_state_set(parser, PM_LEX_STATE_ARG);
11227 } else {
11228 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11229 lex_state_set(parser, PM_LEX_STATE_BEG);
11230 }
11231
11232 LEX(PM_TOKEN_LESS_LESS);
11233 }
11234
11235 if (lex_state_operator_p(parser)) {
11236 lex_state_set(parser, PM_LEX_STATE_ARG);
11237 } else {
11238 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11239 lex_state_set(parser, PM_LEX_STATE_BEG);
11240 }
11241
11242 if (match(parser, '=')) {
11243 if (match(parser, '>')) {
11245 }
11246
11248 }
11249
11250 LEX(PM_TOKEN_LESS);
11251
11252 // > >> >>= >=
11253 case '>':
11254 if (match(parser, '>')) {
11255 if (lex_state_operator_p(parser)) {
11256 lex_state_set(parser, PM_LEX_STATE_ARG);
11257 } else {
11258 lex_state_set(parser, PM_LEX_STATE_BEG);
11259 }
11260 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
11261 }
11262
11263 if (lex_state_operator_p(parser)) {
11264 lex_state_set(parser, PM_LEX_STATE_ARG);
11265 } else {
11266 lex_state_set(parser, PM_LEX_STATE_BEG);
11267 }
11268
11269 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
11270
11271 // double-quoted string literal
11272 case '"': {
11273 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11274 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
11276 }
11277
11278 // xstring literal
11279 case '`': {
11280 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
11281 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11282 LEX(PM_TOKEN_BACKTICK);
11283 }
11284
11285 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
11286 if (previous_command_start) {
11287 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11288 } else {
11289 lex_state_set(parser, PM_LEX_STATE_ARG);
11290 }
11291
11292 LEX(PM_TOKEN_BACKTICK);
11293 }
11294
11295 lex_mode_push_string(parser, true, false, '\0', '`');
11296 LEX(PM_TOKEN_BACKTICK);
11297 }
11298
11299 // single-quoted string literal
11300 case '\'': {
11301 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11302 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
11304 }
11305
11306 // ? character literal
11307 case '?':
11308 LEX(lex_question_mark(parser));
11309
11310 // & && &&= &=
11311 case '&': {
11312 if (match(parser, '&')) {
11313 lex_state_set(parser, PM_LEX_STATE_BEG);
11314
11315 if (match(parser, '=')) {
11317 }
11318
11320 }
11321
11322 if (match(parser, '=')) {
11323 lex_state_set(parser, PM_LEX_STATE_BEG);
11325 }
11326
11327 if (match(parser, '.')) {
11328 lex_state_set(parser, PM_LEX_STATE_DOT);
11330 }
11331
11333 if (lex_state_spcarg_p(parser, space_seen)) {
11334 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
11335 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11336 } else {
11337 const uint8_t delim = peek_offset(parser, 1);
11338
11339 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1)) {
11340 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11341 }
11342 }
11343
11345 } else if (lex_state_beg_p(parser)) {
11347 } else if (ambiguous_operator_p(parser, space_seen)) {
11348 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11349 }
11350
11351 if (lex_state_operator_p(parser)) {
11352 lex_state_set(parser, PM_LEX_STATE_ARG);
11353 } else {
11354 lex_state_set(parser, PM_LEX_STATE_BEG);
11355 }
11356
11357 LEX(type);
11358 }
11359
11360 // | || ||= |=
11361 case '|':
11362 if (match(parser, '|')) {
11363 if (match(parser, '=')) {
11364 lex_state_set(parser, PM_LEX_STATE_BEG);
11366 }
11367
11368 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
11369 parser->current.end--;
11370 LEX(PM_TOKEN_PIPE);
11371 }
11372
11373 lex_state_set(parser, PM_LEX_STATE_BEG);
11374 LEX(PM_TOKEN_PIPE_PIPE);
11375 }
11376
11377 if (match(parser, '=')) {
11378 lex_state_set(parser, PM_LEX_STATE_BEG);
11380 }
11381
11382 if (lex_state_operator_p(parser)) {
11383 lex_state_set(parser, PM_LEX_STATE_ARG);
11384 } else {
11385 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11386 }
11387
11388 LEX(PM_TOKEN_PIPE);
11389
11390 // + += +@
11391 case '+': {
11392 if (lex_state_operator_p(parser)) {
11393 lex_state_set(parser, PM_LEX_STATE_ARG);
11394
11395 if (match(parser, '@')) {
11396 LEX(PM_TOKEN_UPLUS);
11397 }
11398
11399 LEX(PM_TOKEN_PLUS);
11400 }
11401
11402 if (match(parser, '=')) {
11403 lex_state_set(parser, PM_LEX_STATE_BEG);
11405 }
11406
11407 if (
11408 lex_state_beg_p(parser) ||
11409 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
11410 ) {
11411 lex_state_set(parser, PM_LEX_STATE_BEG);
11412
11413 if (pm_char_is_decimal_digit(peek(parser))) {
11414 parser->current.end++;
11415 pm_token_type_t type = lex_numeric(parser);
11416 lex_state_set(parser, PM_LEX_STATE_END);
11417 LEX(type);
11418 }
11419
11420 LEX(PM_TOKEN_UPLUS);
11421 }
11422
11423 if (ambiguous_operator_p(parser, space_seen)) {
11424 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11425 }
11426
11427 lex_state_set(parser, PM_LEX_STATE_BEG);
11428 LEX(PM_TOKEN_PLUS);
11429 }
11430
11431 // - -= -@
11432 case '-': {
11433 if (lex_state_operator_p(parser)) {
11434 lex_state_set(parser, PM_LEX_STATE_ARG);
11435
11436 if (match(parser, '@')) {
11437 LEX(PM_TOKEN_UMINUS);
11438 }
11439
11440 LEX(PM_TOKEN_MINUS);
11441 }
11442
11443 if (match(parser, '=')) {
11444 lex_state_set(parser, PM_LEX_STATE_BEG);
11446 }
11447
11448 if (match(parser, '>')) {
11449 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11451 }
11452
11453 bool spcarg = lex_state_spcarg_p(parser, space_seen);
11454 bool is_beg = lex_state_beg_p(parser);
11455 if (!is_beg && spcarg) {
11456 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
11457 }
11458
11459 if (is_beg || spcarg) {
11460 lex_state_set(parser, PM_LEX_STATE_BEG);
11461 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
11462 }
11463
11464 if (ambiguous_operator_p(parser, space_seen)) {
11465 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11466 }
11467
11468 lex_state_set(parser, PM_LEX_STATE_BEG);
11469 LEX(PM_TOKEN_MINUS);
11470 }
11471
11472 // . .. ...
11473 case '.': {
11474 bool beg_p = lex_state_beg_p(parser);
11475
11476 if (match(parser, '.')) {
11477 if (match(parser, '.')) {
11478 // If we're _not_ inside a range within default parameters
11479 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
11480 if (lex_state_p(parser, PM_LEX_STATE_END)) {
11481 lex_state_set(parser, PM_LEX_STATE_BEG);
11482 } else {
11483 lex_state_set(parser, PM_LEX_STATE_ENDARG);
11484 }
11486 }
11487
11488 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
11489 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
11490 }
11491
11492 lex_state_set(parser, PM_LEX_STATE_BEG);
11494 }
11495
11496 lex_state_set(parser, PM_LEX_STATE_BEG);
11497 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
11498 }
11499
11500 lex_state_set(parser, PM_LEX_STATE_DOT);
11501 LEX(PM_TOKEN_DOT);
11502 }
11503
11504 // integer
11505 case '0':
11506 case '1':
11507 case '2':
11508 case '3':
11509 case '4':
11510 case '5':
11511 case '6':
11512 case '7':
11513 case '8':
11514 case '9': {
11515 pm_token_type_t type = lex_numeric(parser);
11516 lex_state_set(parser, PM_LEX_STATE_END);
11517 LEX(type);
11518 }
11519
11520 // :: symbol
11521 case ':':
11522 if (match(parser, ':')) {
11523 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11524 lex_state_set(parser, PM_LEX_STATE_BEG);
11526 }
11527
11528 lex_state_set(parser, PM_LEX_STATE_DOT);
11530 }
11531
11532 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11533 lex_state_set(parser, PM_LEX_STATE_BEG);
11534 LEX(PM_TOKEN_COLON);
11535 }
11536
11537 if (peek(parser) == '"' || peek(parser) == '\'') {
11538 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11539 parser->current.end++;
11540 }
11541
11542 lex_state_set(parser, PM_LEX_STATE_FNAME);
11544
11545 // / /=
11546 case '/':
11547 if (lex_state_beg_p(parser)) {
11548 lex_mode_push_regexp(parser, '\0', '/');
11550 }
11551
11552 if (match(parser, '=')) {
11553 lex_state_set(parser, PM_LEX_STATE_BEG);
11555 }
11556
11557 if (lex_state_spcarg_p(parser, space_seen)) {
11558 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11559 lex_mode_push_regexp(parser, '\0', '/');
11561 }
11562
11563 if (ambiguous_operator_p(parser, space_seen)) {
11564 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11565 }
11566
11567 if (lex_state_operator_p(parser)) {
11568 lex_state_set(parser, PM_LEX_STATE_ARG);
11569 } else {
11570 lex_state_set(parser, PM_LEX_STATE_BEG);
11571 }
11572
11573 LEX(PM_TOKEN_SLASH);
11574
11575 // ^ ^=
11576 case '^':
11577 if (lex_state_operator_p(parser)) {
11578 lex_state_set(parser, PM_LEX_STATE_ARG);
11579 } else {
11580 lex_state_set(parser, PM_LEX_STATE_BEG);
11581 }
11582 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11583
11584 // ~ ~@
11585 case '~':
11586 if (lex_state_operator_p(parser)) {
11587 (void) match(parser, '@');
11588 lex_state_set(parser, PM_LEX_STATE_ARG);
11589 } else {
11590 lex_state_set(parser, PM_LEX_STATE_BEG);
11591 }
11592
11593 LEX(PM_TOKEN_TILDE);
11594
11595 // % %= %i %I %q %Q %w %W
11596 case '%': {
11597 // If there is no subsequent character then we have an
11598 // invalid token. We're going to say it's the percent
11599 // operator because we don't want to move into the string
11600 // lex mode unnecessarily.
11601 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11602 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11603 LEX(PM_TOKEN_PERCENT);
11604 }
11605
11606 if (!lex_state_beg_p(parser) && match(parser, '=')) {
11607 lex_state_set(parser, PM_LEX_STATE_BEG);
11609 } else if (
11610 lex_state_beg_p(parser) ||
11611 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11612 lex_state_spcarg_p(parser, space_seen)
11613 ) {
11614 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11615 if (*parser->current.end >= 0x80) {
11616 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11617 }
11618
11619 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11620 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11622 }
11623
11624 // Delimiters for %-literals cannot be alphanumeric. We
11625 // validate that here.
11626 uint8_t delimiter = peek_offset(parser, 1);
11627 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11628 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11629 goto lex_next_token;
11630 }
11631
11632 switch (peek(parser)) {
11633 case 'i': {
11634 parser->current.end++;
11635
11636 if (parser->current.end < parser->end) {
11637 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11638 } else {
11639 lex_mode_push_list_eof(parser);
11640 }
11641
11643 }
11644 case 'I': {
11645 parser->current.end++;
11646
11647 if (parser->current.end < parser->end) {
11648 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11649 } else {
11650 lex_mode_push_list_eof(parser);
11651 }
11652
11654 }
11655 case 'r': {
11656 parser->current.end++;
11657
11658 if (parser->current.end < parser->end) {
11659 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11660 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11661 } else {
11662 lex_mode_push_regexp(parser, '\0', '\0');
11663 }
11664
11666 }
11667 case 'q': {
11668 parser->current.end++;
11669
11670 if (parser->current.end < parser->end) {
11671 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11672 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11673 } else {
11674 lex_mode_push_string_eof(parser);
11675 }
11676
11678 }
11679 case 'Q': {
11680 parser->current.end++;
11681
11682 if (parser->current.end < parser->end) {
11683 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11684 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11685 } else {
11686 lex_mode_push_string_eof(parser);
11687 }
11688
11690 }
11691 case 's': {
11692 parser->current.end++;
11693
11694 if (parser->current.end < parser->end) {
11695 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11696 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11697 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11698 } else {
11699 lex_mode_push_string_eof(parser);
11700 }
11701
11703 }
11704 case 'w': {
11705 parser->current.end++;
11706
11707 if (parser->current.end < parser->end) {
11708 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11709 } else {
11710 lex_mode_push_list_eof(parser);
11711 }
11712
11714 }
11715 case 'W': {
11716 parser->current.end++;
11717
11718 if (parser->current.end < parser->end) {
11719 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11720 } else {
11721 lex_mode_push_list_eof(parser);
11722 }
11723
11725 }
11726 case 'x': {
11727 parser->current.end++;
11728
11729 if (parser->current.end < parser->end) {
11730 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11731 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11732 } else {
11733 lex_mode_push_string_eof(parser);
11734 }
11735
11737 }
11738 default:
11739 // If we get to this point, then we have a % that is completely
11740 // unparsable. In this case we'll just drop it from the parser
11741 // and skip past it and hope that the next token is something
11742 // that we can parse.
11743 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11744 goto lex_next_token;
11745 }
11746 }
11747
11748 if (ambiguous_operator_p(parser, space_seen)) {
11749 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11750 }
11751
11752 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11753 LEX(PM_TOKEN_PERCENT);
11754 }
11755
11756 // global variable
11757 case '$': {
11758 pm_token_type_t type = lex_global_variable(parser);
11759
11760 // If we're lexing an embedded variable, then we need to pop back into
11761 // the parent lex context.
11762 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11763 lex_mode_pop(parser);
11764 }
11765
11766 lex_state_set(parser, PM_LEX_STATE_END);
11767 LEX(type);
11768 }
11769
11770 // instance variable, class variable
11771 case '@':
11772 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11773 LEX(lex_at_variable(parser));
11774
11775 default: {
11776 if (*parser->current.start != '_') {
11777 size_t width = char_is_identifier_start(parser, parser->current.start);
11778
11779 // If this isn't the beginning of an identifier, then
11780 // it's an invalid token as we've exhausted all of the
11781 // other options. We'll skip past it and return the next
11782 // token after adding an appropriate error message.
11783 if (!width) {
11784 if (*parser->current.start >= 0x80) {
11785 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11786 } else if (*parser->current.start == '\\') {
11787 switch (peek_at(parser, parser->current.start + 1)) {
11788 case ' ':
11789 parser->current.end++;
11790 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11791 break;
11792 case '\f':
11793 parser->current.end++;
11794 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11795 break;
11796 case '\t':
11797 parser->current.end++;
11798 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11799 break;
11800 case '\v':
11801 parser->current.end++;
11802 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11803 break;
11804 case '\r':
11805 if (peek_at(parser, parser->current.start + 2) != '\n') {
11806 parser->current.end++;
11807 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11808 break;
11809 }
11810 /* fallthrough */
11811 default:
11812 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11813 break;
11814 }
11815 } else if (char_is_ascii_printable(*parser->current.start)) {
11816 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11817 } else {
11818 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11819 }
11820
11821 goto lex_next_token;
11822 }
11823
11824 parser->current.end = parser->current.start + width;
11825 }
11826
11827 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11828
11829 // If we've hit a __END__ and it was at the start of the
11830 // line or the start of the file and it is followed by
11831 // either a \n or a \r\n, then this is the last token of the
11832 // file.
11833 if (
11834 ((parser->current.end - parser->current.start) == 7) &&
11835 current_token_starts_line(parser) &&
11836 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11837 (parser->current.end == parser->end || match_eol(parser))
11838 ) {
11839 // Since we know we're about to add an __END__ comment,
11840 // we know we need to add all of the newlines to get the
11841 // correct column information for it.
11842 const uint8_t *cursor = parser->current.end;
11843 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11844 pm_newline_list_append(&parser->newline_list, cursor++);
11845 }
11846
11847 parser->current.end = parser->end;
11848 parser->current.type = PM_TOKEN___END__;
11849 parser_lex_callback(parser);
11850
11851 parser->data_loc.start = parser->current.start;
11852 parser->data_loc.end = parser->current.end;
11853
11854 LEX(PM_TOKEN_EOF);
11855 }
11856
11857 pm_lex_state_t last_state = parser->lex_state;
11858
11860 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11861 if (previous_command_start) {
11862 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11863 } else {
11864 lex_state_set(parser, PM_LEX_STATE_ARG);
11865 }
11866 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11867 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11868 } else {
11869 lex_state_set(parser, PM_LEX_STATE_END);
11870 }
11871 }
11872
11873 if (
11874 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11876 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11877 pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11878 ) {
11879 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11880 }
11881
11882 LEX(type);
11883 }
11884 }
11885 }
11886 case PM_LEX_LIST: {
11887 if (parser->next_start != NULL) {
11888 parser->current.end = parser->next_start;
11889 parser->next_start = NULL;
11890 }
11891
11892 // First we'll set the beginning of the token.
11893 parser->current.start = parser->current.end;
11894
11895 // If there's any whitespace at the start of the list, then we're
11896 // going to trim it off the beginning and create a new token.
11897 size_t whitespace;
11898
11899 if (parser->heredoc_end) {
11900 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11901 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11902 whitespace += 1;
11903 }
11904 } else {
11905 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11906 }
11907
11908 if (whitespace > 0) {
11909 parser->current.end += whitespace;
11910 if (peek_offset(parser, -1) == '\n') {
11911 // mutates next_start
11912 parser_flush_heredoc_end(parser);
11913 }
11914 LEX(PM_TOKEN_WORDS_SEP);
11915 }
11916
11917 // We'll check if we're at the end of the file. If we are, then we
11918 // need to return the EOF token.
11919 if (parser->current.end >= parser->end) {
11920 LEX(PM_TOKEN_EOF);
11921 }
11922
11923 // Here we'll get a list of the places where strpbrk should break,
11924 // and then find the first one.
11925 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11926 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11927 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11928
11929 // If we haven't found an escape yet, then this buffer will be
11930 // unallocated since we can refer directly to the source string.
11931 pm_token_buffer_t token_buffer = { 0 };
11932
11933 while (breakpoint != NULL) {
11934 // If we hit whitespace, then we must have received content by
11935 // now, so we can return an element of the list.
11936 if (pm_char_is_whitespace(*breakpoint)) {
11937 parser->current.end = breakpoint;
11938 pm_token_buffer_flush(parser, &token_buffer);
11940 }
11941
11942 // If we hit the terminator, we need to check which token to
11943 // return.
11944 if (*breakpoint == lex_mode->as.list.terminator) {
11945 // If this terminator doesn't actually close the list, then
11946 // we need to continue on past it.
11947 if (lex_mode->as.list.nesting > 0) {
11948 parser->current.end = breakpoint + 1;
11949 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11950 lex_mode->as.list.nesting--;
11951 continue;
11952 }
11953
11954 // If we've hit the terminator and we've already skipped
11955 // past content, then we can return a list node.
11956 if (breakpoint > parser->current.start) {
11957 parser->current.end = breakpoint;
11958 pm_token_buffer_flush(parser, &token_buffer);
11960 }
11961
11962 // Otherwise, switch back to the default state and return
11963 // the end of the list.
11964 parser->current.end = breakpoint + 1;
11965 lex_mode_pop(parser);
11966 lex_state_set(parser, PM_LEX_STATE_END);
11968 }
11969
11970 // If we hit a null byte, skip directly past it.
11971 if (*breakpoint == '\0') {
11972 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11973 continue;
11974 }
11975
11976 // If we hit escapes, then we need to treat the next token
11977 // literally. In this case we'll skip past the next character
11978 // and find the next breakpoint.
11979 if (*breakpoint == '\\') {
11980 parser->current.end = breakpoint + 1;
11981
11982 // If we've hit the end of the file, then break out of the
11983 // loop by setting the breakpoint to NULL.
11984 if (parser->current.end == parser->end) {
11985 breakpoint = NULL;
11986 continue;
11987 }
11988
11989 pm_token_buffer_escape(parser, &token_buffer);
11990 uint8_t peeked = peek(parser);
11991
11992 switch (peeked) {
11993 case ' ':
11994 case '\f':
11995 case '\t':
11996 case '\v':
11997 case '\\':
11998 pm_token_buffer_push_byte(&token_buffer, peeked);
11999 parser->current.end++;
12000 break;
12001 case '\r':
12002 parser->current.end++;
12003 if (peek(parser) != '\n') {
12004 pm_token_buffer_push_byte(&token_buffer, '\r');
12005 break;
12006 }
12007 /* fallthrough */
12008 case '\n':
12009 pm_token_buffer_push_byte(&token_buffer, '\n');
12010
12011 if (parser->heredoc_end) {
12012 // ... if we are on the same line as a heredoc,
12013 // flush the heredoc and continue parsing after
12014 // heredoc_end.
12015 parser_flush_heredoc_end(parser);
12016 pm_token_buffer_copy(parser, &token_buffer);
12018 } else {
12019 // ... else track the newline.
12020 pm_newline_list_append(&parser->newline_list, parser->current.end);
12021 }
12022
12023 parser->current.end++;
12024 break;
12025 default:
12026 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
12027 pm_token_buffer_push_byte(&token_buffer, peeked);
12028 parser->current.end++;
12029 } else if (lex_mode->as.list.interpolation) {
12030 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12031 } else {
12032 pm_token_buffer_push_byte(&token_buffer, '\\');
12033 pm_token_buffer_push_escaped(&token_buffer, parser);
12034 }
12035
12036 break;
12037 }
12038
12039 token_buffer.cursor = parser->current.end;
12040 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12041 continue;
12042 }
12043
12044 // If we hit a #, then we will attempt to lex interpolation.
12045 if (*breakpoint == '#') {
12046 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12047
12048 if (type == PM_TOKEN_NOT_PROVIDED) {
12049 // If we haven't returned at this point then we had something
12050 // that looked like an interpolated class or instance variable
12051 // like "#@" but wasn't actually. In this case we'll just skip
12052 // to the next breakpoint.
12053 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12054 continue;
12055 }
12056
12058 pm_token_buffer_flush(parser, &token_buffer);
12059 }
12060
12061 LEX(type);
12062 }
12063
12064 // If we've hit the incrementor, then we need to skip past it
12065 // and find the next breakpoint.
12066 assert(*breakpoint == lex_mode->as.list.incrementor);
12067 parser->current.end = breakpoint + 1;
12068 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12069 lex_mode->as.list.nesting++;
12070 continue;
12071 }
12072
12073 if (parser->current.end > parser->current.start) {
12074 pm_token_buffer_flush(parser, &token_buffer);
12076 }
12077
12078 // If we were unable to find a breakpoint, then this token hits the
12079 // end of the file.
12080 parser->current.end = parser->end;
12081 pm_token_buffer_flush(parser, &token_buffer);
12083 }
12084 case PM_LEX_REGEXP: {
12085 // First, we'll set to start of this token to be the current end.
12086 if (parser->next_start == NULL) {
12087 parser->current.start = parser->current.end;
12088 } else {
12089 parser->current.start = parser->next_start;
12090 parser->current.end = parser->next_start;
12091 parser->next_start = NULL;
12092 }
12093
12094 // We'll check if we're at the end of the file. If we are, then we
12095 // need to return the EOF token.
12096 if (parser->current.end >= parser->end) {
12097 LEX(PM_TOKEN_EOF);
12098 }
12099
12100 // Get a reference to the current mode.
12101 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12102
12103 // These are the places where we need to split up the content of the
12104 // regular expression. We'll use strpbrk to find the first of these
12105 // characters.
12106 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
12107 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12108 pm_regexp_token_buffer_t token_buffer = { 0 };
12109
12110 while (breakpoint != NULL) {
12111 uint8_t term = lex_mode->as.regexp.terminator;
12112 bool is_terminator = (*breakpoint == term);
12113
12114 // If the terminator is newline, we need to consider \r\n _also_ a newline
12115 // For example: `%\nfoo\r\n`
12116 // The string should be "foo", not "foo\r"
12117 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12118 if (term == '\n') {
12119 is_terminator = true;
12120 }
12121
12122 // If the terminator is a CR, but we see a CRLF, we need to
12123 // treat the CRLF as a newline, meaning this is _not_ the
12124 // terminator
12125 if (term == '\r') {
12126 is_terminator = false;
12127 }
12128 }
12129
12130 // If we hit the terminator, we need to determine what kind of
12131 // token to return.
12132 if (is_terminator) {
12133 if (lex_mode->as.regexp.nesting > 0) {
12134 parser->current.end = breakpoint + 1;
12135 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12136 lex_mode->as.regexp.nesting--;
12137 continue;
12138 }
12139
12140 // Here we've hit the terminator. If we have already consumed
12141 // content then we need to return that content as string content
12142 // first.
12143 if (breakpoint > parser->current.start) {
12144 parser->current.end = breakpoint;
12145 pm_regexp_token_buffer_flush(parser, &token_buffer);
12147 }
12148
12149 // Check here if we need to track the newline.
12150 size_t eol_length = match_eol_at(parser, breakpoint);
12151 if (eol_length) {
12152 parser->current.end = breakpoint + eol_length;
12153 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12154 } else {
12155 parser->current.end = breakpoint + 1;
12156 }
12157
12158 // Since we've hit the terminator of the regular expression,
12159 // we now need to parse the options.
12160 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
12161
12162 lex_mode_pop(parser);
12163 lex_state_set(parser, PM_LEX_STATE_END);
12165 }
12166
12167 // If we've hit the incrementor, then we need to skip past it
12168 // and find the next breakpoint.
12169 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
12170 parser->current.end = breakpoint + 1;
12171 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12172 lex_mode->as.regexp.nesting++;
12173 continue;
12174 }
12175
12176 switch (*breakpoint) {
12177 case '\0':
12178 // If we hit a null byte, skip directly past it.
12179 parser->current.end = breakpoint + 1;
12180 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12181 break;
12182 case '\r':
12183 if (peek_at(parser, breakpoint + 1) != '\n') {
12184 parser->current.end = breakpoint + 1;
12185 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12186 break;
12187 }
12188
12189 breakpoint++;
12190 parser->current.end = breakpoint;
12191 pm_regexp_token_buffer_escape(parser, &token_buffer);
12192 token_buffer.base.cursor = breakpoint;
12193
12194 /* fallthrough */
12195 case '\n':
12196 // If we've hit a newline, then we need to track that in
12197 // the list of newlines.
12198 if (parser->heredoc_end == NULL) {
12199 pm_newline_list_append(&parser->newline_list, breakpoint);
12200 parser->current.end = breakpoint + 1;
12201 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12202 break;
12203 }
12204
12205 parser->current.end = breakpoint + 1;
12206 parser_flush_heredoc_end(parser);
12207 pm_regexp_token_buffer_flush(parser, &token_buffer);
12209 case '\\': {
12210 // If we hit escapes, then we need to treat the next
12211 // token literally. In this case we'll skip past the
12212 // next character and find the next breakpoint.
12213 parser->current.end = breakpoint + 1;
12214
12215 // If we've hit the end of the file, then break out of
12216 // the loop by setting the breakpoint to NULL.
12217 if (parser->current.end == parser->end) {
12218 breakpoint = NULL;
12219 break;
12220 }
12221
12222 pm_regexp_token_buffer_escape(parser, &token_buffer);
12223 uint8_t peeked = peek(parser);
12224
12225 switch (peeked) {
12226 case '\r':
12227 parser->current.end++;
12228 if (peek(parser) != '\n') {
12229 if (lex_mode->as.regexp.terminator != '\r') {
12230 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12231 }
12232 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
12233 pm_token_buffer_push_byte(&token_buffer.base, '\r');
12234 break;
12235 }
12236 /* fallthrough */
12237 case '\n':
12238 if (parser->heredoc_end) {
12239 // ... if we are on the same line as a heredoc,
12240 // flush the heredoc and continue parsing after
12241 // heredoc_end.
12242 parser_flush_heredoc_end(parser);
12243 pm_regexp_token_buffer_copy(parser, &token_buffer);
12245 } else {
12246 // ... else track the newline.
12247 pm_newline_list_append(&parser->newline_list, parser->current.end);
12248 }
12249
12250 parser->current.end++;
12251 break;
12252 case 'c':
12253 case 'C':
12254 case 'M':
12255 case 'u':
12256 case 'x':
12257 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
12258 break;
12259 default:
12260 if (lex_mode->as.regexp.terminator == peeked) {
12261 // Some characters when they are used as the
12262 // terminator also receive an escape. They are
12263 // enumerated here.
12264 switch (peeked) {
12265 case '$': case ')': case '*': case '+':
12266 case '.': case '>': case '?': case ']':
12267 case '^': case '|': case '}':
12268 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12269 break;
12270 default:
12271 break;
12272 }
12273
12274 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
12275 pm_token_buffer_push_byte(&token_buffer.base, peeked);
12276 parser->current.end++;
12277 break;
12278 }
12279
12280 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
12281 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
12282 break;
12283 }
12284
12285 token_buffer.base.cursor = parser->current.end;
12286 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12287 break;
12288 }
12289 case '#': {
12290 // If we hit a #, then we will attempt to lex
12291 // interpolation.
12292 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12293
12294 if (type == PM_TOKEN_NOT_PROVIDED) {
12295 // If we haven't returned at this point then we had
12296 // something that looked like an interpolated class or
12297 // instance variable like "#@" but wasn't actually. In
12298 // this case we'll just skip to the next breakpoint.
12299 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12300 break;
12301 }
12302
12304 pm_regexp_token_buffer_flush(parser, &token_buffer);
12305 }
12306
12307 LEX(type);
12308 }
12309 default:
12310 assert(false && "unreachable");
12311 break;
12312 }
12313 }
12314
12315 if (parser->current.end > parser->current.start) {
12316 pm_regexp_token_buffer_flush(parser, &token_buffer);
12318 }
12319
12320 // If we were unable to find a breakpoint, then this token hits the
12321 // end of the file.
12322 parser->current.end = parser->end;
12323 pm_regexp_token_buffer_flush(parser, &token_buffer);
12325 }
12326 case PM_LEX_STRING: {
12327 // First, we'll set to start of this token to be the current end.
12328 if (parser->next_start == NULL) {
12329 parser->current.start = parser->current.end;
12330 } else {
12331 parser->current.start = parser->next_start;
12332 parser->current.end = parser->next_start;
12333 parser->next_start = NULL;
12334 }
12335
12336 // We'll check if we're at the end of the file. If we are, then we need to
12337 // return the EOF token.
12338 if (parser->current.end >= parser->end) {
12339 LEX(PM_TOKEN_EOF);
12340 }
12341
12342 // These are the places where we need to split up the content of the
12343 // string. We'll use strpbrk to find the first of these characters.
12344 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12345 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
12346 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12347
12348 // If we haven't found an escape yet, then this buffer will be
12349 // unallocated since we can refer directly to the source string.
12350 pm_token_buffer_t token_buffer = { 0 };
12351
12352 while (breakpoint != NULL) {
12353 // If we hit the incrementor, then we'll increment then nesting and
12354 // continue lexing.
12355 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
12356 lex_mode->as.string.nesting++;
12357 parser->current.end = breakpoint + 1;
12358 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12359 continue;
12360 }
12361
12362 uint8_t term = lex_mode->as.string.terminator;
12363 bool is_terminator = (*breakpoint == term);
12364
12365 // If the terminator is newline, we need to consider \r\n _also_ a newline
12366 // For example: `%r\nfoo\r\n`
12367 // The string should be /foo/, not /foo\r/
12368 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12369 if (term == '\n') {
12370 is_terminator = true;
12371 }
12372
12373 // If the terminator is a CR, but we see a CRLF, we need to
12374 // treat the CRLF as a newline, meaning this is _not_ the
12375 // terminator
12376 if (term == '\r') {
12377 is_terminator = false;
12378 }
12379 }
12380
12381 // Note that we have to check the terminator here first because we could
12382 // potentially be parsing a % string that has a # character as the
12383 // terminator.
12384 if (is_terminator) {
12385 // If this terminator doesn't actually close the string, then we need
12386 // to continue on past it.
12387 if (lex_mode->as.string.nesting > 0) {
12388 parser->current.end = breakpoint + 1;
12389 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12390 lex_mode->as.string.nesting--;
12391 continue;
12392 }
12393
12394 // Here we've hit the terminator. If we have already consumed content
12395 // then we need to return that content as string content first.
12396 if (breakpoint > parser->current.start) {
12397 parser->current.end = breakpoint;
12398 pm_token_buffer_flush(parser, &token_buffer);
12400 }
12401
12402 // Otherwise we need to switch back to the parent lex mode and
12403 // return the end of the string.
12404 size_t eol_length = match_eol_at(parser, breakpoint);
12405 if (eol_length) {
12406 parser->current.end = breakpoint + eol_length;
12407 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12408 } else {
12409 parser->current.end = breakpoint + 1;
12410 }
12411
12412 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
12413 parser->current.end++;
12414 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
12415 lex_mode_pop(parser);
12416 LEX(PM_TOKEN_LABEL_END);
12417 }
12418
12419 lex_state_set(parser, PM_LEX_STATE_END);
12420 lex_mode_pop(parser);
12422 }
12423
12424 switch (*breakpoint) {
12425 case '\0':
12426 // Skip directly past the null character.
12427 parser->current.end = breakpoint + 1;
12428 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12429 break;
12430 case '\r':
12431 if (peek_at(parser, breakpoint + 1) != '\n') {
12432 parser->current.end = breakpoint + 1;
12433 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12434 break;
12435 }
12436
12437 // If we hit a \r\n sequence, then we need to treat it
12438 // as a newline.
12439 breakpoint++;
12440 parser->current.end = breakpoint;
12441 pm_token_buffer_escape(parser, &token_buffer);
12442 token_buffer.cursor = breakpoint;
12443
12444 /* fallthrough */
12445 case '\n':
12446 // When we hit a newline, we need to flush any potential
12447 // heredocs. Note that this has to happen after we check
12448 // for the terminator in case the terminator is a
12449 // newline character.
12450 if (parser->heredoc_end == NULL) {
12451 pm_newline_list_append(&parser->newline_list, breakpoint);
12452 parser->current.end = breakpoint + 1;
12453 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12454 break;
12455 }
12456
12457 parser->current.end = breakpoint + 1;
12458 parser_flush_heredoc_end(parser);
12459 pm_token_buffer_flush(parser, &token_buffer);
12461 case '\\': {
12462 // Here we hit escapes.
12463 parser->current.end = breakpoint + 1;
12464
12465 // If we've hit the end of the file, then break out of
12466 // the loop by setting the breakpoint to NULL.
12467 if (parser->current.end == parser->end) {
12468 breakpoint = NULL;
12469 continue;
12470 }
12471
12472 pm_token_buffer_escape(parser, &token_buffer);
12473 uint8_t peeked = peek(parser);
12474
12475 switch (peeked) {
12476 case '\\':
12477 pm_token_buffer_push_byte(&token_buffer, '\\');
12478 parser->current.end++;
12479 break;
12480 case '\r':
12481 parser->current.end++;
12482 if (peek(parser) != '\n') {
12483 if (!lex_mode->as.string.interpolation) {
12484 pm_token_buffer_push_byte(&token_buffer, '\\');
12485 }
12486 pm_token_buffer_push_byte(&token_buffer, '\r');
12487 break;
12488 }
12489 /* fallthrough */
12490 case '\n':
12491 if (!lex_mode->as.string.interpolation) {
12492 pm_token_buffer_push_byte(&token_buffer, '\\');
12493 pm_token_buffer_push_byte(&token_buffer, '\n');
12494 }
12495
12496 if (parser->heredoc_end) {
12497 // ... if we are on the same line as a heredoc,
12498 // flush the heredoc and continue parsing after
12499 // heredoc_end.
12500 parser_flush_heredoc_end(parser);
12501 pm_token_buffer_copy(parser, &token_buffer);
12503 } else {
12504 // ... else track the newline.
12505 pm_newline_list_append(&parser->newline_list, parser->current.end);
12506 }
12507
12508 parser->current.end++;
12509 break;
12510 default:
12511 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12512 pm_token_buffer_push_byte(&token_buffer, peeked);
12513 parser->current.end++;
12514 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12515 pm_token_buffer_push_byte(&token_buffer, peeked);
12516 parser->current.end++;
12517 } else if (lex_mode->as.string.interpolation) {
12518 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12519 } else {
12520 pm_token_buffer_push_byte(&token_buffer, '\\');
12521 pm_token_buffer_push_escaped(&token_buffer, parser);
12522 }
12523
12524 break;
12525 }
12526
12527 token_buffer.cursor = parser->current.end;
12528 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12529 break;
12530 }
12531 case '#': {
12532 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12533
12534 if (type == PM_TOKEN_NOT_PROVIDED) {
12535 // If we haven't returned at this point then we had something that
12536 // looked like an interpolated class or instance variable like "#@"
12537 // but wasn't actually. In this case we'll just skip to the next
12538 // breakpoint.
12539 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12540 break;
12541 }
12542
12544 pm_token_buffer_flush(parser, &token_buffer);
12545 }
12546
12547 LEX(type);
12548 }
12549 default:
12550 assert(false && "unreachable");
12551 }
12552 }
12553
12554 if (parser->current.end > parser->current.start) {
12555 pm_token_buffer_flush(parser, &token_buffer);
12557 }
12558
12559 // If we've hit the end of the string, then this is an unterminated
12560 // string. In that case we'll return a string content token.
12561 parser->current.end = parser->end;
12562 pm_token_buffer_flush(parser, &token_buffer);
12564 }
12565 case PM_LEX_HEREDOC: {
12566 // First, we'll set to start of this token.
12567 if (parser->next_start == NULL) {
12568 parser->current.start = parser->current.end;
12569 } else {
12570 parser->current.start = parser->next_start;
12571 parser->current.end = parser->next_start;
12572 parser->heredoc_end = NULL;
12573 parser->next_start = NULL;
12574 }
12575
12576 // Now let's grab the information about the identifier off of the
12577 // current lex mode.
12578 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12579 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12580
12581 bool line_continuation = lex_mode->as.heredoc.line_continuation;
12582 lex_mode->as.heredoc.line_continuation = false;
12583
12584 // We'll check if we're at the end of the file. If we are, then we
12585 // will add an error (because we weren't able to find the
12586 // terminator) but still continue parsing so that content after the
12587 // declaration of the heredoc can be parsed.
12588 if (parser->current.end >= parser->end) {
12589 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12590 parser->next_start = lex_mode->as.heredoc.next_start;
12591 parser->heredoc_end = parser->current.end;
12592 lex_state_set(parser, PM_LEX_STATE_END);
12593 lex_mode_pop(parser);
12595 }
12596
12597 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12598 size_t ident_length = heredoc_lex_mode->ident_length;
12599
12600 // If we are immediately following a newline and we have hit the
12601 // terminator, then we need to return the ending of the heredoc.
12602 if (current_token_starts_line(parser)) {
12603 const uint8_t *start = parser->current.start;
12604
12605 if (!line_continuation && (start + ident_length <= parser->end)) {
12606 const uint8_t *newline = next_newline(start, parser->end - start);
12607 const uint8_t *ident_end = newline;
12608 const uint8_t *terminator_end = newline;
12609
12610 if (newline == NULL) {
12611 terminator_end = parser->end;
12612 ident_end = parser->end;
12613 } else {
12614 terminator_end++;
12615 if (newline[-1] == '\r') {
12616 ident_end--; // Remove \r
12617 }
12618 }
12619
12620 const uint8_t *terminator_start = ident_end - ident_length;
12621 const uint8_t *cursor = start;
12622
12623 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12624 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12625 cursor++;
12626 }
12627 }
12628
12629 if (
12630 (cursor == terminator_start) &&
12631 (memcmp(terminator_start, ident_start, ident_length) == 0)
12632 ) {
12633 if (newline != NULL) {
12634 pm_newline_list_append(&parser->newline_list, newline);
12635 }
12636
12637 parser->current.end = terminator_end;
12638 if (*lex_mode->as.heredoc.next_start == '\\') {
12639 parser->next_start = NULL;
12640 } else {
12641 parser->next_start = lex_mode->as.heredoc.next_start;
12642 parser->heredoc_end = parser->current.end;
12643 }
12644
12645 lex_state_set(parser, PM_LEX_STATE_END);
12646 lex_mode_pop(parser);
12648 }
12649 }
12650
12651 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12652 if (
12653 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12654 lex_mode->as.heredoc.common_whitespace != NULL &&
12655 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12656 peek_at(parser, start) != '\n'
12657 ) {
12658 *lex_mode->as.heredoc.common_whitespace = whitespace;
12659 }
12660 }
12661
12662 // Otherwise we'll be parsing string content. These are the places
12663 // where we need to split up the content of the heredoc. We'll use
12664 // strpbrk to find the first of these characters.
12665 uint8_t breakpoints[] = "\r\n\\#";
12666
12667 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12668 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12669 breakpoints[3] = '\0';
12670 }
12671
12672 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12673 pm_token_buffer_t token_buffer = { 0 };
12674 bool was_line_continuation = false;
12675
12676 while (breakpoint != NULL) {
12677 switch (*breakpoint) {
12678 case '\0':
12679 // Skip directly past the null character.
12680 parser->current.end = breakpoint + 1;
12681 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12682 break;
12683 case '\r':
12684 parser->current.end = breakpoint + 1;
12685
12686 if (peek_at(parser, breakpoint + 1) != '\n') {
12687 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12688 break;
12689 }
12690
12691 // If we hit a \r\n sequence, then we want to replace it
12692 // with a single \n character in the final string.
12693 breakpoint++;
12694 pm_token_buffer_escape(parser, &token_buffer);
12695 token_buffer.cursor = breakpoint;
12696
12697 /* fallthrough */
12698 case '\n': {
12699 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12700 parser_flush_heredoc_end(parser);
12701 parser->current.end = breakpoint + 1;
12702 pm_token_buffer_flush(parser, &token_buffer);
12704 }
12705
12706 pm_newline_list_append(&parser->newline_list, breakpoint);
12707
12708 // If we have a - or ~ heredoc, then we can match after
12709 // some leading whitespace.
12710 const uint8_t *start = breakpoint + 1;
12711
12712 if (!was_line_continuation && (start + ident_length <= parser->end)) {
12713 // We want to match the terminator starting from the end of the line in case
12714 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12715 const uint8_t *newline = next_newline(start, parser->end - start);
12716
12717 if (newline == NULL) {
12718 newline = parser->end;
12719 } else if (newline[-1] == '\r') {
12720 newline--; // Remove \r
12721 }
12722
12723 // Start of a possible terminator.
12724 const uint8_t *terminator_start = newline - ident_length;
12725
12726 // Cursor to check for the leading whitespace. We skip the
12727 // leading whitespace if we have a - or ~ heredoc.
12728 const uint8_t *cursor = start;
12729
12730 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12731 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12732 cursor++;
12733 }
12734 }
12735
12736 if (
12737 cursor == terminator_start &&
12738 (memcmp(terminator_start, ident_start, ident_length) == 0)
12739 ) {
12740 parser->current.end = breakpoint + 1;
12741 pm_token_buffer_flush(parser, &token_buffer);
12743 }
12744 }
12745
12746 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12747
12748 // If we have hit a newline that is followed by a valid
12749 // terminator, then we need to return the content of the
12750 // heredoc here as string content. Then, the next time a
12751 // token is lexed, it will match again and return the
12752 // end of the heredoc.
12753 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12754 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12755 *lex_mode->as.heredoc.common_whitespace = whitespace;
12756 }
12757
12758 parser->current.end = breakpoint + 1;
12759 pm_token_buffer_flush(parser, &token_buffer);
12761 }
12762
12763 // Otherwise we hit a newline and it wasn't followed by
12764 // a terminator, so we can continue parsing.
12765 parser->current.end = breakpoint + 1;
12766 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12767 break;
12768 }
12769 case '\\': {
12770 // If we hit an escape, then we need to skip past
12771 // however many characters the escape takes up. However
12772 // it's important that if \n or \r\n are escaped, we
12773 // stop looping before the newline and not after the
12774 // newline so that we can still potentially find the
12775 // terminator of the heredoc.
12776 parser->current.end = breakpoint + 1;
12777
12778 // If we've hit the end of the file, then break out of
12779 // the loop by setting the breakpoint to NULL.
12780 if (parser->current.end == parser->end) {
12781 breakpoint = NULL;
12782 continue;
12783 }
12784
12785 pm_token_buffer_escape(parser, &token_buffer);
12786 uint8_t peeked = peek(parser);
12787
12788 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12789 switch (peeked) {
12790 case '\r':
12791 parser->current.end++;
12792 if (peek(parser) != '\n') {
12793 pm_token_buffer_push_byte(&token_buffer, '\\');
12794 pm_token_buffer_push_byte(&token_buffer, '\r');
12795 break;
12796 }
12797 /* fallthrough */
12798 case '\n':
12799 pm_token_buffer_push_byte(&token_buffer, '\\');
12800 pm_token_buffer_push_byte(&token_buffer, '\n');
12801 token_buffer.cursor = parser->current.end + 1;
12802 breakpoint = parser->current.end;
12803 continue;
12804 default:
12805 pm_token_buffer_push_byte(&token_buffer, '\\');
12806 pm_token_buffer_push_escaped(&token_buffer, parser);
12807 break;
12808 }
12809 } else {
12810 switch (peeked) {
12811 case '\r':
12812 parser->current.end++;
12813 if (peek(parser) != '\n') {
12814 pm_token_buffer_push_byte(&token_buffer, '\r');
12815 break;
12816 }
12817 /* fallthrough */
12818 case '\n':
12819 // If we are in a tilde here, we should
12820 // break out of the loop and return the
12821 // string content.
12822 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12823 const uint8_t *end = parser->current.end;
12824 pm_newline_list_append(&parser->newline_list, end);
12825
12826 // Here we want the buffer to only
12827 // include up to the backslash.
12828 parser->current.end = breakpoint;
12829 pm_token_buffer_flush(parser, &token_buffer);
12830
12831 // Now we can advance the end of the
12832 // token past the newline.
12833 parser->current.end = end + 1;
12834 lex_mode->as.heredoc.line_continuation = true;
12836 }
12837
12838 was_line_continuation = true;
12839 token_buffer.cursor = parser->current.end + 1;
12840 breakpoint = parser->current.end;
12841 continue;
12842 default:
12843 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12844 break;
12845 }
12846 }
12847
12848 token_buffer.cursor = parser->current.end;
12849 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12850 break;
12851 }
12852 case '#': {
12853 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12854
12855 if (type == PM_TOKEN_NOT_PROVIDED) {
12856 // If we haven't returned at this point then we had
12857 // something that looked like an interpolated class
12858 // or instance variable like "#@" but wasn't
12859 // actually. In this case we'll just skip to the
12860 // next breakpoint.
12861 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12862 break;
12863 }
12864
12866 pm_token_buffer_flush(parser, &token_buffer);
12867 }
12868
12869 LEX(type);
12870 }
12871 default:
12872 assert(false && "unreachable");
12873 }
12874
12875 was_line_continuation = false;
12876 }
12877
12878 if (parser->current.end > parser->current.start) {
12879 parser->current.end = parser->end;
12880 pm_token_buffer_flush(parser, &token_buffer);
12882 }
12883
12884 // If we've hit the end of the string, then this is an unterminated
12885 // heredoc. In that case we'll return a string content token.
12886 parser->current.end = parser->end;
12887 pm_token_buffer_flush(parser, &token_buffer);
12889 }
12890 }
12891
12892 assert(false && "unreachable");
12893}
12894
12895#undef LEX
12896
12897/******************************************************************************/
12898/* Parse functions */
12899/******************************************************************************/
12900
12909typedef enum {
12910 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12911 PM_BINDING_POWER_STATEMENT = 2,
12912 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12913 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12914 PM_BINDING_POWER_COMPOSITION = 8, // and or
12915 PM_BINDING_POWER_NOT = 10, // not
12916 PM_BINDING_POWER_MATCH = 12, // => in
12917 PM_BINDING_POWER_DEFINED = 14, // defined?
12918 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12919 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12920 PM_BINDING_POWER_TERNARY = 20, // ?:
12921 PM_BINDING_POWER_RANGE = 22, // .. ...
12922 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12923 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12924 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12925 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12926 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12927 PM_BINDING_POWER_BITWISE_AND = 34, // &
12928 PM_BINDING_POWER_SHIFT = 36, // << >>
12929 PM_BINDING_POWER_TERM = 38, // + -
12930 PM_BINDING_POWER_FACTOR = 40, // * / %
12931 PM_BINDING_POWER_UMINUS = 42, // -@
12932 PM_BINDING_POWER_EXPONENT = 44, // **
12933 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12934 PM_BINDING_POWER_INDEX = 48, // [] []=
12935 PM_BINDING_POWER_CALL = 50, // :: .
12936 PM_BINDING_POWER_MAX = 52
12937} pm_binding_power_t;
12938
12943typedef struct {
12945 pm_binding_power_t left;
12946
12948 pm_binding_power_t right;
12949
12952
12959
12960#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12961#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12962#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12963#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12964#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12965
12966pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12967 // rescue
12968 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
12969
12970 // if unless until while
12971 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12972 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12973 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12974 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12975
12976 // and or
12977 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12978 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12979
12980 // => in
12981 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12982 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12983
12984 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12985 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12986 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12987 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12988 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12989 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12990 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12991 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12992 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12993 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12994 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12995 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12996 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12997 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12998 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12999
13000 // ?:
13001 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
13002
13003 // .. ...
13004 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13005 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13006 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13007 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13008
13009 // ||
13010 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
13011
13012 // &&
13013 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
13014
13015 // != !~ == === =~ <=>
13016 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13017 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13018 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13019 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13020 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13021 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13022
13023 // > >= < <=
13024 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13025 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13026 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13027 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13028
13029 // ^ |
13030 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13031 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13032
13033 // &
13034 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
13035
13036 // >> <<
13037 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13038 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13039
13040 // - +
13041 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13042 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13043
13044 // % / *
13045 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13046 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13047 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13048 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
13049
13050 // -@
13051 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
13052 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
13053
13054 // **
13055 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
13056 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13057
13058 // ! ~ +@
13059 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13060 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13061 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13062
13063 // [
13064 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
13065
13066 // :: . &.
13067 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13068 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13069 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
13070};
13071
13072#undef BINDING_POWER_ASSIGNMENT
13073#undef LEFT_ASSOCIATIVE
13074#undef RIGHT_ASSOCIATIVE
13075#undef RIGHT_ASSOCIATIVE_UNARY
13076
13080static inline bool
13081match1(const pm_parser_t *parser, pm_token_type_t type) {
13082 return parser->current.type == type;
13083}
13084
13088static inline bool
13089match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13090 return match1(parser, type1) || match1(parser, type2);
13091}
13092
13096static inline bool
13097match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13098 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
13099}
13100
13104static inline bool
13105match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
13106 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13107}
13108
13112static inline bool
13113match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
13114 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
13115}
13116
13120static inline bool
13121match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
13122 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13123}
13124
13128static inline bool
13129match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
13130 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
13131}
13132
13139static bool
13140accept1(pm_parser_t *parser, pm_token_type_t type) {
13141 if (match1(parser, type)) {
13142 parser_lex(parser);
13143 return true;
13144 }
13145 return false;
13146}
13147
13152static inline bool
13153accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13154 if (match2(parser, type1, type2)) {
13155 parser_lex(parser);
13156 return true;
13157 }
13158 return false;
13159}
13160
13172static void
13173expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
13174 if (accept1(parser, type)) return;
13175
13176 const uint8_t *location = parser->previous.end;
13177 pm_parser_err(parser, location, location, diag_id);
13178
13179 parser->previous.start = location;
13180 parser->previous.type = PM_TOKEN_MISSING;
13181}
13182
13187static void
13188expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
13189 if (accept2(parser, type1, type2)) return;
13190
13191 const uint8_t *location = parser->previous.end;
13192 pm_parser_err(parser, location, location, diag_id);
13193
13194 parser->previous.start = location;
13195 parser->previous.type = PM_TOKEN_MISSING;
13196}
13197
13202static void
13203expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
13204 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
13205 parser_lex(parser);
13206 } else {
13207 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
13208 parser->previous.start = parser->previous.end;
13209 parser->previous.type = PM_TOKEN_MISSING;
13210 }
13211}
13212
13213static pm_node_t *
13214parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
13215
13220static pm_node_t *
13221parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
13222 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
13223 pm_assert_value_expression(parser, node);
13224 return node;
13225}
13226
13245static inline bool
13246token_begins_expression_p(pm_token_type_t type) {
13247 switch (type) {
13250 // We need to special case this because it is a binary operator that
13251 // should not be marked as beginning an expression.
13252 return false;
13255 case PM_TOKEN_COLON:
13256 case PM_TOKEN_COMMA:
13258 case PM_TOKEN_EOF:
13269 case PM_TOKEN_NEWLINE:
13271 case PM_TOKEN_SEMICOLON:
13272 // The reason we need this short-circuit is because we're using the
13273 // binding powers table to tell us if the subsequent token could
13274 // potentially be the start of an expression. If there _is_ a binding
13275 // power for one of these tokens, then we should remove it from this list
13276 // and let it be handled by the default case below.
13277 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
13278 return false;
13280 // This is a special case because this unary operator cannot appear
13281 // as a general operator, it only appears in certain circumstances.
13282 return false;
13284 case PM_TOKEN_UMINUS:
13286 case PM_TOKEN_UPLUS:
13287 case PM_TOKEN_BANG:
13288 case PM_TOKEN_TILDE:
13289 case PM_TOKEN_UDOT_DOT:
13291 // These unary tokens actually do have binding power associated with them
13292 // so that we can correctly place them into the precedence order. But we
13293 // want them to be marked as beginning an expression, so we need to
13294 // special case them here.
13295 return true;
13296 default:
13297 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
13298 }
13299}
13300
13305static pm_node_t *
13306parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
13307 if (accept1(parser, PM_TOKEN_USTAR)) {
13308 pm_token_t operator = parser->previous;
13309 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13310 return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
13311 }
13312
13313 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
13314}
13315
13320static void
13321parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
13322 // The method name needs to change. If we previously had
13323 // foo, we now need foo=. In this case we'll allocate a new
13324 // owned string, copy the previous method name in, and
13325 // append an =.
13326 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
13327 size_t length = constant->length;
13328 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
13329 if (name == NULL) return;
13330
13331 memcpy(name, constant->start, length);
13332 name[length] = '=';
13333
13334 // Now switch the name to the new string.
13335 // This silences clang analyzer warning about leak of memory pointed by `name`.
13336 // NOLINTNEXTLINE(clang-analyzer-*)
13337 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
13338}
13339
13346static pm_node_t *
13347parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13348 switch (PM_NODE_TYPE(target)) {
13349 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13350 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13351 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13352 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13353 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13354 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13355 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13356 default: break;
13357 }
13358
13359 pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13360 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13361
13362 pm_node_destroy(parser, target);
13363 return (pm_node_t *) result;
13364}
13365
13371static void
13372parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13373 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13374
13375 for (size_t index = 0; index < implicit_parameters->size; index++) {
13376 if (implicit_parameters->nodes[index] == node) {
13377 // If the node is not the last one in the list, we need to shift the
13378 // remaining nodes down to fill the gap. This is extremely unlikely
13379 // to happen.
13380 if (index != implicit_parameters->size - 1) {
13381 memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13382 }
13383
13384 implicit_parameters->size--;
13385 break;
13386 }
13387 }
13388}
13389
13398static pm_node_t *
13399parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13400 switch (PM_NODE_TYPE(target)) {
13401 case PM_MISSING_NODE:
13402 return target;
13404 case PM_FALSE_NODE:
13407 case PM_NIL_NODE:
13408 case PM_SELF_NODE:
13409 case PM_TRUE_NODE: {
13410 // In these special cases, we have specific error messages and we
13411 // will replace them with local variable writes.
13412 return parse_unwriteable_target(parser, target);
13413 }
13417 return target;
13419 if (context_def_p(parser)) {
13420 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13421 }
13422
13425
13426 return target;
13428 if (context_def_p(parser)) {
13429 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13430 }
13431
13432 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13433 target->type = PM_CONSTANT_TARGET_NODE;
13434
13435 return target;
13438 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13439 return target;
13443 return target;
13445 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13446 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13447 parse_target_implicit_parameter(parser, target);
13448 }
13449
13450 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13451 uint32_t name = cast->name;
13452 uint32_t depth = cast->depth;
13453 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13454
13457
13458 return target;
13459 }
13461 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13462 pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13463
13464 parse_target_implicit_parameter(parser, target);
13465 pm_node_destroy(parser, target);
13466
13467 return node;
13468 }
13472 return target;
13474 if (splat_parent) {
13475 // Multi target is not accepted in all positions. If this is one
13476 // of them, then we need to add an error.
13477 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13478 }
13479
13480 return target;
13481 case PM_SPLAT_NODE: {
13482 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13483
13484 if (splat->expression != NULL) {
13485 splat->expression = parse_target(parser, splat->expression, multiple, true);
13486 }
13487
13488 return (pm_node_t *) splat;
13489 }
13490 case PM_CALL_NODE: {
13491 pm_call_node_t *call = (pm_call_node_t *) target;
13492
13493 // If we have no arguments to the call node and we need this to be a
13494 // target then this is either a method call or a local variable
13495 // write.
13496 if (
13497 (call->message_loc.start != NULL) &&
13498 (call->message_loc.end[-1] != '!') &&
13499 (call->message_loc.end[-1] != '?') &&
13500 (call->opening_loc.start == NULL) &&
13501 (call->arguments == NULL) &&
13502 (call->block == NULL)
13503 ) {
13504 if (call->receiver == NULL) {
13505 // When we get here, we have a local variable write, because it
13506 // was previously marked as a method call but now we have an =.
13507 // This looks like:
13508 //
13509 // foo = 1
13510 //
13511 // When it was parsed in the prefix position, foo was seen as a
13512 // method call with no receiver and no arguments. Now we have an
13513 // =, so we know it's a local variable write.
13514 const pm_location_t message_loc = call->message_loc;
13515
13516 pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
13517 pm_node_destroy(parser, target);
13518
13519 return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
13520 }
13521
13522 if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13523 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13524 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13525 }
13526
13527 parse_write_name(parser, &call->name);
13528 return (pm_node_t *) pm_call_target_node_create(parser, call);
13529 }
13530 }
13531
13532 // If there is no call operator and the message is "[]" then this is
13533 // an aref expression, and we can transform it into an aset
13534 // expression.
13535 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13536 return (pm_node_t *) pm_index_target_node_create(parser, call);
13537 }
13538 }
13539 /* fallthrough */
13540 default:
13541 // In this case we have a node that we don't know how to convert
13542 // into a target. We need to treat it as an error. For now, we'll
13543 // mark it as an error and just skip right past it.
13544 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13545 return target;
13546 }
13547}
13548
13553static pm_node_t *
13554parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13555 pm_node_t *result = parse_target(parser, target, multiple, false);
13556
13557 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13558 // parens after the targets.
13559 if (
13560 !match1(parser, PM_TOKEN_EQUAL) &&
13561 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13562 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13563 ) {
13564 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13565 }
13566
13567 return result;
13568}
13569
13574static pm_node_t *
13575parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13576 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13577
13578 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13579 return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
13580 }
13581
13582 return write;
13583}
13584
13588static pm_node_t *
13589parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13590 switch (PM_NODE_TYPE(target)) {
13591 case PM_MISSING_NODE:
13592 pm_node_destroy(parser, value);
13593 return target;
13595 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13596 pm_node_destroy(parser, target);
13597 return (pm_node_t *) node;
13598 }
13599 case PM_CONSTANT_PATH_NODE: {
13600 pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13601
13602 if (context_def_p(parser)) {
13603 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13604 }
13605
13606 return parse_shareable_constant_write(parser, node);
13607 }
13608 case PM_CONSTANT_READ_NODE: {
13609 pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13610
13611 if (context_def_p(parser)) {
13612 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13613 }
13614
13615 pm_node_destroy(parser, target);
13616 return parse_shareable_constant_write(parser, node);
13617 }
13620 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13621 /* fallthrough */
13623 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13624 pm_node_destroy(parser, target);
13625 return (pm_node_t *) node;
13626 }
13629
13630 pm_constant_id_t name = local_read->name;
13631 pm_location_t name_loc = target->location;
13632
13633 uint32_t depth = local_read->depth;
13634 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13635
13636 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13637 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13638 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13639 parse_target_implicit_parameter(parser, target);
13640 }
13641
13642 pm_locals_unread(&scope->locals, name);
13643 pm_node_destroy(parser, target);
13644
13645 return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13646 }
13648 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13649 pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13650
13651 parse_target_implicit_parameter(parser, target);
13652 pm_node_destroy(parser, target);
13653
13654 return node;
13655 }
13657 pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13658 pm_node_destroy(parser, target);
13659 return write_node;
13660 }
13662 return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
13663 case PM_SPLAT_NODE: {
13664 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13665
13666 if (splat->expression != NULL) {
13667 splat->expression = parse_write(parser, splat->expression, operator, value);
13668 }
13669
13670 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13671 pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
13672
13673 return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
13674 }
13675 case PM_CALL_NODE: {
13676 pm_call_node_t *call = (pm_call_node_t *) target;
13677
13678 // If we have no arguments to the call node and we need this to be a
13679 // target then this is either a method call or a local variable
13680 // write.
13681 if (
13682 (call->message_loc.start != NULL) &&
13683 (call->message_loc.end[-1] != '!') &&
13684 (call->message_loc.end[-1] != '?') &&
13685 (call->opening_loc.start == NULL) &&
13686 (call->arguments == NULL) &&
13687 (call->block == NULL)
13688 ) {
13689 if (call->receiver == NULL) {
13690 // When we get here, we have a local variable write, because it
13691 // was previously marked as a method call but now we have an =.
13692 // This looks like:
13693 //
13694 // foo = 1
13695 //
13696 // When it was parsed in the prefix position, foo was seen as a
13697 // method call with no receiver and no arguments. Now we have an
13698 // =, so we know it's a local variable write.
13699 const pm_location_t message = call->message_loc;
13700
13701 pm_parser_local_add_location(parser, message.start, message.end, 0);
13702 pm_node_destroy(parser, target);
13703
13704 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
13705 target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
13706
13707 pm_refute_numbered_parameter(parser, message.start, message.end);
13708 return target;
13709 }
13710
13711 if (char_is_identifier_start(parser, call->message_loc.start)) {
13712 // When we get here, we have a method call, because it was
13713 // previously marked as a method call but now we have an =. This
13714 // looks like:
13715 //
13716 // foo.bar = 1
13717 //
13718 // When it was parsed in the prefix position, foo.bar was seen as a
13719 // method call with no arguments. Now we have an =, so we know it's
13720 // a method call with an argument. In this case we will create the
13721 // arguments node, parse the argument, and add it to the list.
13722 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13723 call->arguments = arguments;
13724
13725 pm_arguments_node_arguments_append(arguments, value);
13726 call->base.location.end = arguments->base.location.end;
13727
13728 parse_write_name(parser, &call->name);
13729 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13730
13731 return (pm_node_t *) call;
13732 }
13733 }
13734
13735 // If there is no call operator and the message is "[]" then this is
13736 // an aref expression, and we can transform it into an aset
13737 // expression.
13738 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13739 if (call->arguments == NULL) {
13740 call->arguments = pm_arguments_node_create(parser);
13741 }
13742
13743 pm_arguments_node_arguments_append(call->arguments, value);
13744 target->location.end = value->location.end;
13745
13746 // Replace the name with "[]=".
13747 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13748
13749 // Ensure that the arguments for []= don't contain keywords
13750 pm_index_arguments_check(parser, call->arguments, call->block);
13751 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13752
13753 return target;
13754 }
13755
13756 // If there are arguments on the call node, then it can't be a method
13757 // call ending with = or a local variable write, so it must be a
13758 // syntax error. In this case we'll fall through to our default
13759 // handling. We need to free the value that we parsed because there
13760 // is no way for us to attach it to the tree at this point.
13761 pm_node_destroy(parser, value);
13762 }
13763 /* fallthrough */
13764 default:
13765 // In this case we have a node that we don't know how to convert into a
13766 // target. We need to treat it as an error. For now, we'll mark it as an
13767 // error and just skip right past it.
13768 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13769 return target;
13770 }
13771}
13772
13779static pm_node_t *
13780parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13781 switch (PM_NODE_TYPE(target)) {
13782 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13783 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13784 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13785 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13786 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13787 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13788 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13789 default: break;
13790 }
13791
13792 pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13793 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13794
13795 pm_node_destroy(parser, target);
13796 return (pm_node_t *) result;
13797}
13798
13809static pm_node_t *
13810parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13811 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13812
13813 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13814 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13815
13816 while (accept1(parser, PM_TOKEN_COMMA)) {
13817 if (accept1(parser, PM_TOKEN_USTAR)) {
13818 // Here we have a splat operator. It can have a name or be
13819 // anonymous. It can be the final target or be in the middle if
13820 // there haven't been any others yet.
13821 if (has_rest) {
13822 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13823 }
13824
13825 pm_token_t star_operator = parser->previous;
13826 pm_node_t *name = NULL;
13827
13828 if (token_begins_expression_p(parser->current.type)) {
13829 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13830 name = parse_target(parser, name, true, true);
13831 }
13832
13833 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
13834 pm_multi_target_node_targets_append(parser, result, splat);
13835 has_rest = true;
13836 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13837 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13838 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13839 target = parse_target(parser, target, true, false);
13840
13841 pm_multi_target_node_targets_append(parser, result, target);
13842 context_pop(parser);
13843 } else if (token_begins_expression_p(parser->current.type)) {
13844 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13845 target = parse_target(parser, target, true, false);
13846
13847 pm_multi_target_node_targets_append(parser, result, target);
13848 } else if (!match1(parser, PM_TOKEN_EOF)) {
13849 // If we get here, then we have a trailing , in a multi target node.
13850 // We'll add an implicit rest node to represent this.
13851 pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13852 pm_multi_target_node_targets_append(parser, result, rest);
13853 break;
13854 }
13855 }
13856
13857 return (pm_node_t *) result;
13858}
13859
13864static pm_node_t *
13865parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13866 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13867 accept1(parser, PM_TOKEN_NEWLINE);
13868
13869 // Ensure that we have either an = or a ) after the targets.
13870 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13871 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13872 }
13873
13874 return result;
13875}
13876
13880static pm_statements_node_t *
13881parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13882 // First, skip past any optional terminators that might be at the beginning
13883 // of the statements.
13884 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13885
13886 // If we have a terminator, then we can just return NULL.
13887 if (context_terminator(context, &parser->current)) return NULL;
13888
13889 pm_statements_node_t *statements = pm_statements_node_create(parser);
13890
13891 // At this point we know we have at least one statement, and that it
13892 // immediately follows the current token.
13893 context_push(parser, context);
13894
13895 while (true) {
13896 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13897 pm_statements_node_body_append(parser, statements, node, true);
13898
13899 // If we're recovering from a syntax error, then we need to stop parsing
13900 // the statements now.
13901 if (parser->recovering) {
13902 // If this is the level of context where the recovery has happened,
13903 // then we can mark the parser as done recovering.
13904 if (context_terminator(context, &parser->current)) parser->recovering = false;
13905 break;
13906 }
13907
13908 // If we have a terminator, then we will parse all consecutive
13909 // terminators and then continue parsing the statements list.
13910 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13911 // If we have a terminator, then we will continue parsing the
13912 // statements list.
13913 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13914 if (context_terminator(context, &parser->current)) break;
13915
13916 // Now we can continue parsing the list of statements.
13917 continue;
13918 }
13919
13920 // At this point we have a list of statements that are not terminated by
13921 // a newline or semicolon. At this point we need to check if we're at
13922 // the end of the statements list. If we are, then we should break out
13923 // of the loop.
13924 if (context_terminator(context, &parser->current)) break;
13925
13926 // At this point, we have a syntax error, because the statement was not
13927 // terminated by a newline or semicolon, and we're not at the end of the
13928 // statements list. Ideally we should scan forward to determine if we
13929 // should insert a missing terminator or break out of parsing the
13930 // statements list at this point.
13931 //
13932 // We don't have that yet, so instead we'll do a more naive approach. If
13933 // we were unable to parse an expression, then we will skip past this
13934 // token and continue parsing the statements list. Otherwise we'll add
13935 // an error and continue parsing the statements list.
13936 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13937 parser_lex(parser);
13938
13939 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13940 if (context_terminator(context, &parser->current)) break;
13941 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13942 // This is an inlined version of accept1 because the error that we
13943 // want to add has varargs. If this happens again, we should
13944 // probably extract a helper function.
13945 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13946 parser->previous.start = parser->previous.end;
13947 parser->previous.type = PM_TOKEN_MISSING;
13948 }
13949 }
13950
13951 context_pop(parser);
13952 bool last_value = true;
13953 switch (context) {
13956 last_value = false;
13957 break;
13958 default:
13959 break;
13960 }
13961 pm_void_statements_check(parser, statements, last_value);
13962
13963 return statements;
13964}
13965
13970static void
13971pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13972 const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13973
13974 if (duplicated != NULL) {
13975 pm_buffer_t buffer = { 0 };
13976 pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13977
13978 pm_diagnostic_list_append_format(
13979 &parser->warning_list,
13980 duplicated->location.start,
13981 duplicated->location.end,
13982 PM_WARN_DUPLICATED_HASH_KEY,
13983 (int) pm_buffer_length(&buffer),
13984 pm_buffer_value(&buffer),
13985 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
13986 );
13987
13988 pm_buffer_free(&buffer);
13989 }
13990}
13991
13996static void
13997pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13998 pm_node_t *previous;
13999
14000 if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
14001 pm_diagnostic_list_append_format(
14002 &parser->warning_list,
14003 node->location.start,
14004 node->location.end,
14005 PM_WARN_DUPLICATED_WHEN_CLAUSE,
14006 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
14007 pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
14008 );
14009 }
14010}
14011
14015static bool
14016parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
14018 bool contains_keyword_splat = false;
14019
14020 while (true) {
14021 pm_node_t *element;
14022
14023 switch (parser->current.type) {
14024 case PM_TOKEN_USTAR_STAR: {
14025 parser_lex(parser);
14026 pm_token_t operator = parser->previous;
14027 pm_node_t *value = NULL;
14028
14029 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
14030 // If we're about to parse a nested hash that is being
14031 // pushed into this hash directly with **, then we want the
14032 // inner hash to share the static literals with the outer
14033 // hash.
14034 parser->current_hash_keys = literals;
14035 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14036 } else if (token_begins_expression_p(parser->current.type)) {
14037 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14038 } else {
14039 pm_parser_scope_forwarding_keywords_check(parser, &operator);
14040 }
14041
14042 element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
14043 contains_keyword_splat = true;
14044 break;
14045 }
14046 case PM_TOKEN_LABEL: {
14047 pm_token_t label = parser->current;
14048 parser_lex(parser);
14049
14050 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
14051 pm_hash_key_static_literals_add(parser, literals, key);
14052
14053 pm_token_t operator = not_provided(parser);
14054 pm_node_t *value = NULL;
14055
14056 if (token_begins_expression_p(parser->current.type)) {
14057 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
14058 } else {
14059 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
14060 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
14061 value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
14062 } else {
14063 int depth = -1;
14064 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
14065
14066 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
14067 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
14068 } else {
14069 depth = pm_parser_local_depth(parser, &identifier);
14070 }
14071
14072 if (depth == -1) {
14073 value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
14074 } else {
14075 value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
14076 }
14077 }
14078
14079 value->location.end++;
14080 value = (pm_node_t *) pm_implicit_node_create(parser, value);
14081 }
14082
14083 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14084 break;
14085 }
14086 default: {
14087 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
14088
14089 // Hash keys that are strings are automatically frozen. We will
14090 // mark that here.
14091 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
14092 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
14093 }
14094
14095 pm_hash_key_static_literals_add(parser, literals, key);
14096
14097 pm_token_t operator;
14098 if (pm_symbol_node_label_p(key)) {
14099 operator = not_provided(parser);
14100 } else {
14101 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
14102 operator = parser->previous;
14103 }
14104
14105 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14106 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14107 break;
14108 }
14109 }
14110
14111 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
14112 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
14113 } else {
14114 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
14115 }
14116
14117 // If there's no comma after the element, then we're done.
14118 if (!accept1(parser, PM_TOKEN_COMMA)) break;
14119
14120 // If the next element starts with a label or a **, then we know we have
14121 // another element in the hash, so we'll continue parsing.
14122 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
14123
14124 // Otherwise we need to check if the subsequent token begins an expression.
14125 // If it does, then we'll continue parsing.
14126 if (token_begins_expression_p(parser->current.type)) continue;
14127
14128 // Otherwise by default we will exit out of this loop.
14129 break;
14130 }
14131
14132 return contains_keyword_splat;
14133}
14134
14138static inline void
14139parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
14140 if (arguments->arguments == NULL) {
14141 arguments->arguments = pm_arguments_node_create(parser);
14142 }
14143
14144 pm_arguments_node_arguments_append(arguments->arguments, argument);
14145}
14146
14150static void
14151parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
14152 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14153
14154 // First we need to check if the next token is one that could be the start
14155 // of an argument. If it's not, then we can just return.
14156 if (
14157 match2(parser, terminator, PM_TOKEN_EOF) ||
14158 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
14159 context_terminator(parser->current_context->context, &parser->current)
14160 ) {
14161 return;
14162 }
14163
14164 bool parsed_first_argument = false;
14165 bool parsed_bare_hash = false;
14166 bool parsed_block_argument = false;
14167 bool parsed_forwarding_arguments = false;
14168
14169 while (!match1(parser, PM_TOKEN_EOF)) {
14170 if (parsed_forwarding_arguments) {
14171 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
14172 }
14173
14174 pm_node_t *argument = NULL;
14175
14176 switch (parser->current.type) {
14178 case PM_TOKEN_LABEL: {
14179 if (parsed_bare_hash) {
14180 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
14181 }
14182
14183 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
14184 argument = (pm_node_t *) hash;
14185
14186 pm_static_literals_t hash_keys = { 0 };
14187 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
14188
14189 parse_arguments_append(parser, arguments, argument);
14190
14192 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14193 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14194
14195 pm_static_literals_free(&hash_keys);
14196 parsed_bare_hash = true;
14197
14198 break;
14199 }
14200 case PM_TOKEN_UAMPERSAND: {
14201 parser_lex(parser);
14202 pm_token_t operator = parser->previous;
14203 pm_node_t *expression = NULL;
14204
14205 if (token_begins_expression_p(parser->current.type)) {
14206 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14207 } else {
14208 pm_parser_scope_forwarding_block_check(parser, &operator);
14209 }
14210
14211 argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
14212 if (parsed_block_argument) {
14213 parse_arguments_append(parser, arguments, argument);
14214 } else {
14215 arguments->block = argument;
14216 }
14217
14218 if (match1(parser, PM_TOKEN_COMMA)) {
14219 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
14220 }
14221
14222 parsed_block_argument = true;
14223 break;
14224 }
14225 case PM_TOKEN_USTAR: {
14226 parser_lex(parser);
14227 pm_token_t operator = parser->previous;
14228
14230 pm_parser_scope_forwarding_positionals_check(parser, &operator);
14231 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14232 if (parsed_bare_hash) {
14233 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14234 }
14235 } else {
14236 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14237
14238 if (parsed_bare_hash) {
14239 pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14240 }
14241
14242 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
14243 }
14244
14245 parse_arguments_append(parser, arguments, argument);
14246 break;
14247 }
14248 case PM_TOKEN_UDOT_DOT_DOT: {
14249 if (accepts_forwarding) {
14250 parser_lex(parser);
14251
14252 if (token_begins_expression_p(parser->current.type)) {
14253 // If the token begins an expression then this ... was
14254 // not actually argument forwarding but was instead a
14255 // range.
14256 pm_token_t operator = parser->previous;
14257 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
14258
14259 // If we parse a range, we need to validate that we
14260 // didn't accidentally violate the nonassoc rules of the
14261 // ... operator.
14262 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
14263 pm_range_node_t *range = (pm_range_node_t *) right;
14264 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
14265 }
14266
14267 argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
14268 } else {
14269 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
14270 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
14271 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
14272 }
14273
14274 argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
14275 parse_arguments_append(parser, arguments, argument);
14276 pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
14277 arguments->has_forwarding = true;
14278 parsed_forwarding_arguments = true;
14279 break;
14280 }
14281 }
14282 }
14283 /* fallthrough */
14284 default: {
14285 if (argument == NULL) {
14286 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14287 }
14288
14289 bool contains_keywords = false;
14290 bool contains_keyword_splat = false;
14291
14292 if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14293 if (parsed_bare_hash) {
14294 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
14295 }
14296
14297 pm_token_t operator;
14298 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
14299 operator = parser->previous;
14300 } else {
14301 operator = not_provided(parser);
14302 }
14303
14304 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
14305 contains_keywords = true;
14306
14307 // Create the set of static literals for this hash.
14308 pm_static_literals_t hash_keys = { 0 };
14309 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
14310
14311 // Finish parsing the one we are part way through.
14312 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14313 argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
14314
14315 pm_keyword_hash_node_elements_append(bare_hash, argument);
14316 argument = (pm_node_t *) bare_hash;
14317
14318 // Then parse more if we have a comma
14319 if (accept1(parser, PM_TOKEN_COMMA) && (
14320 token_begins_expression_p(parser->current.type) ||
14321 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
14322 )) {
14323 contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
14324 }
14325
14326 pm_static_literals_free(&hash_keys);
14327 parsed_bare_hash = true;
14328 }
14329
14330 parse_arguments_append(parser, arguments, argument);
14331
14332 pm_node_flags_t flags = 0;
14333 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14334 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14335 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14336
14337 break;
14338 }
14339 }
14340
14341 parsed_first_argument = true;
14342
14343 // If parsing the argument failed, we need to stop parsing arguments.
14344 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
14345
14346 // If the terminator of these arguments is not EOF, then we have a
14347 // specific token we're looking for. In that case we can accept a
14348 // newline here because it is not functioning as a statement terminator.
14349 bool accepted_newline = false;
14350 if (terminator != PM_TOKEN_EOF) {
14351 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14352 }
14353
14354 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
14355 // If we previously were on a comma and we just parsed a bare hash,
14356 // then we want to continue parsing arguments. This is because the
14357 // comma was grabbed up by the hash parser.
14358 } else if (accept1(parser, PM_TOKEN_COMMA)) {
14359 // If there was a comma, then we need to check if we also accepted a
14360 // newline. If we did, then this is a syntax error.
14361 if (accepted_newline) {
14362 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14363 }
14364 } else {
14365 // If there is no comma at the end of the argument list then we're
14366 // done parsing arguments and can break out of this loop.
14367 break;
14368 }
14369
14370 // If we hit the terminator, then that means we have a trailing comma so
14371 // we can accept that output as well.
14372 if (match1(parser, terminator)) break;
14373 }
14374}
14375
14387parse_required_destructured_parameter(pm_parser_t *parser) {
14388 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14389
14390 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14391 pm_multi_target_node_opening_set(node, &parser->previous);
14392
14393 do {
14394 pm_node_t *param;
14395
14396 // If we get here then we have a trailing comma, which isn't allowed in
14397 // the grammar. In other places, multi targets _do_ allow trailing
14398 // commas, so here we'll assume this is a mistake of the user not
14399 // knowing it's not allowed here.
14400 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14401 param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14402 pm_multi_target_node_targets_append(parser, node, param);
14403 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14404 break;
14405 }
14406
14407 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14408 param = (pm_node_t *) parse_required_destructured_parameter(parser);
14409 } else if (accept1(parser, PM_TOKEN_USTAR)) {
14410 pm_token_t star = parser->previous;
14411 pm_node_t *value = NULL;
14412
14413 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14414 pm_token_t name = parser->previous;
14415 value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14416 if (pm_parser_parameter_name_check(parser, &name)) {
14417 pm_node_flag_set_repeated_parameter(value);
14418 }
14419 pm_parser_local_add_token(parser, &name, 1);
14420 }
14421
14422 param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
14423 } else {
14424 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14425 pm_token_t name = parser->previous;
14426
14427 param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14428 if (pm_parser_parameter_name_check(parser, &name)) {
14429 pm_node_flag_set_repeated_parameter(param);
14430 }
14431 pm_parser_local_add_token(parser, &name, 1);
14432 }
14433
14434 pm_multi_target_node_targets_append(parser, node, param);
14435 } while (accept1(parser, PM_TOKEN_COMMA));
14436
14437 accept1(parser, PM_TOKEN_NEWLINE);
14438 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14439 pm_multi_target_node_closing_set(node, &parser->previous);
14440
14441 return node;
14442}
14443
14448typedef enum {
14449 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14450 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14451 PM_PARAMETERS_ORDER_KEYWORDS_REST,
14452 PM_PARAMETERS_ORDER_KEYWORDS,
14453 PM_PARAMETERS_ORDER_REST,
14454 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14455 PM_PARAMETERS_ORDER_OPTIONAL,
14456 PM_PARAMETERS_ORDER_NAMED,
14457 PM_PARAMETERS_ORDER_NONE,
14458} pm_parameters_order_t;
14459
14463static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14464 [0] = PM_PARAMETERS_NO_CHANGE,
14465 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14466 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14467 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14468 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14469 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14470 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14471 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14472 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14473 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14474 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14475 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14476};
14477
14485static bool
14486update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14487 pm_parameters_order_t state = parameters_ordering[token->type];
14488 if (state == PM_PARAMETERS_NO_CHANGE) return true;
14489
14490 // If we see another ordered argument after a optional argument
14491 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14492 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14493 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14494 return true;
14495 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14496 return true;
14497 }
14498
14499 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14500 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14501 return false;
14502 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14503 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14504 return false;
14505 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14506 // We know what transition we failed on, so we can provide a better error here.
14507 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14508 return false;
14509 }
14510
14511 if (state < *current) *current = state;
14512 return true;
14513}
14514
14518static pm_parameters_node_t *
14519parse_parameters(
14520 pm_parser_t *parser,
14521 pm_binding_power_t binding_power,
14522 bool uses_parentheses,
14523 bool allows_trailing_comma,
14524 bool allows_forwarding_parameters,
14525 bool accepts_blocks_in_defaults,
14526 bool in_block,
14527 uint16_t depth
14528) {
14529 pm_do_loop_stack_push(parser, false);
14530
14531 pm_parameters_node_t *params = pm_parameters_node_create(parser);
14532 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14533
14534 while (true) {
14535 bool parsing = true;
14536
14537 switch (parser->current.type) {
14539 update_parameter_state(parser, &parser->current, &order);
14540 pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
14541
14542 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14543 pm_parameters_node_requireds_append(params, param);
14544 } else {
14545 pm_parameters_node_posts_append(params, param);
14546 }
14547 break;
14548 }
14550 case PM_TOKEN_AMPERSAND: {
14551 update_parameter_state(parser, &parser->current, &order);
14552 parser_lex(parser);
14553
14554 pm_token_t operator = parser->previous;
14555 pm_token_t name;
14556
14557 bool repeated = false;
14558 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14559 name = parser->previous;
14560 repeated = pm_parser_parameter_name_check(parser, &name);
14561 pm_parser_local_add_token(parser, &name, 1);
14562 } else {
14563 name = not_provided(parser);
14564 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14565 }
14566
14567 pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
14568 if (repeated) {
14569 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14570 }
14571 if (params->block == NULL) {
14572 pm_parameters_node_block_set(params, param);
14573 } else {
14574 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
14575 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14576 }
14577
14578 break;
14579 }
14580 case PM_TOKEN_UDOT_DOT_DOT: {
14581 if (!allows_forwarding_parameters) {
14582 pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14583 }
14584
14585 bool succeeded = update_parameter_state(parser, &parser->current, &order);
14586 parser_lex(parser);
14587
14588 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14589 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14590
14591 if (params->keyword_rest != NULL) {
14592 // If we already have a keyword rest parameter, then we replace it with the
14593 // forwarding parameter and move the keyword rest parameter to the posts list.
14594 pm_node_t *keyword_rest = params->keyword_rest;
14595 pm_parameters_node_posts_append(params, keyword_rest);
14596 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14597 params->keyword_rest = NULL;
14598 }
14599
14600 pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14601 break;
14602 }
14605 case PM_TOKEN_CONSTANT:
14608 case PM_TOKEN_METHOD_NAME: {
14609 parser_lex(parser);
14610 switch (parser->previous.type) {
14611 case PM_TOKEN_CONSTANT:
14612 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14613 break;
14615 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14616 break;
14618 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14619 break;
14621 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14622 break;
14624 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14625 break;
14626 default: break;
14627 }
14628
14629 if (parser->current.type == PM_TOKEN_EQUAL) {
14630 update_parameter_state(parser, &parser->current, &order);
14631 } else {
14632 update_parameter_state(parser, &parser->previous, &order);
14633 }
14634
14635 pm_token_t name = parser->previous;
14636 bool repeated = pm_parser_parameter_name_check(parser, &name);
14637 pm_parser_local_add_token(parser, &name, 1);
14638
14639 if (match1(parser, PM_TOKEN_EQUAL)) {
14640 pm_token_t operator = parser->current;
14641 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14642 parser_lex(parser);
14643
14644 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14645 uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14646
14647 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14648 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14649 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14650
14651 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14652
14653 if (repeated) {
14654 pm_node_flag_set_repeated_parameter((pm_node_t *) param);
14655 }
14656 pm_parameters_node_optionals_append(params, param);
14657
14658 // If the value of the parameter increased the number of
14659 // reads of that parameter, then we need to warn that we
14660 // have a circular definition.
14661 if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14662 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14663 }
14664
14665 context_pop(parser);
14666
14667 // If parsing the value of the parameter resulted in error recovery,
14668 // then we can put a missing node in its place and stop parsing the
14669 // parameters entirely now.
14670 if (parser->recovering) {
14671 parsing = false;
14672 break;
14673 }
14674 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14675 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14676 if (repeated) {
14677 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14678 }
14679 pm_parameters_node_requireds_append(params, (pm_node_t *) param);
14680 } else {
14681 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14682 if (repeated) {
14683 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14684 }
14685 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14686 }
14687
14688 break;
14689 }
14690 case PM_TOKEN_LABEL: {
14691 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14692 update_parameter_state(parser, &parser->current, &order);
14693
14694 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14695 parser_lex(parser);
14696
14697 pm_token_t name = parser->previous;
14698 pm_token_t local = name;
14699 local.end -= 1;
14700
14701 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14702 pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14703 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14704 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14705 }
14706
14707 bool repeated = pm_parser_parameter_name_check(parser, &local);
14708 pm_parser_local_add_token(parser, &local, 1);
14709
14710 switch (parser->current.type) {
14711 case PM_TOKEN_COMMA:
14713 case PM_TOKEN_PIPE: {
14714 context_pop(parser);
14715
14716 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14717 if (repeated) {
14718 pm_node_flag_set_repeated_parameter(param);
14719 }
14720
14721 pm_parameters_node_keywords_append(params, param);
14722 break;
14723 }
14724 case PM_TOKEN_SEMICOLON:
14725 case PM_TOKEN_NEWLINE: {
14726 context_pop(parser);
14727
14728 if (uses_parentheses) {
14729 parsing = false;
14730 break;
14731 }
14732
14733 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14734 if (repeated) {
14735 pm_node_flag_set_repeated_parameter(param);
14736 }
14737
14738 pm_parameters_node_keywords_append(params, param);
14739 break;
14740 }
14741 default: {
14742 pm_node_t *param;
14743
14744 if (token_begins_expression_p(parser->current.type)) {
14745 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14746 uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14747
14748 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14749 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14750 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14751
14752 if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14753 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14754 }
14755
14756 param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
14757 }
14758 else {
14759 param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14760 }
14761
14762 if (repeated) {
14763 pm_node_flag_set_repeated_parameter(param);
14764 }
14765
14766 context_pop(parser);
14767 pm_parameters_node_keywords_append(params, param);
14768
14769 // If parsing the value of the parameter resulted in error recovery,
14770 // then we can put a missing node in its place and stop parsing the
14771 // parameters entirely now.
14772 if (parser->recovering) {
14773 parsing = false;
14774 break;
14775 }
14776 }
14777 }
14778
14779 parser->in_keyword_arg = false;
14780 break;
14781 }
14782 case PM_TOKEN_USTAR:
14783 case PM_TOKEN_STAR: {
14784 update_parameter_state(parser, &parser->current, &order);
14785 parser_lex(parser);
14786
14787 pm_token_t operator = parser->previous;
14788 pm_token_t name;
14789 bool repeated = false;
14790
14791 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14792 name = parser->previous;
14793 repeated = pm_parser_parameter_name_check(parser, &name);
14794 pm_parser_local_add_token(parser, &name, 1);
14795 } else {
14796 name = not_provided(parser);
14797 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14798 }
14799
14800 pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
14801 if (repeated) {
14802 pm_node_flag_set_repeated_parameter(param);
14803 }
14804
14805 if (params->rest == NULL) {
14806 pm_parameters_node_rest_set(params, param);
14807 } else {
14808 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14809 pm_parameters_node_posts_append(params, param);
14810 }
14811
14812 break;
14813 }
14814 case PM_TOKEN_STAR_STAR:
14815 case PM_TOKEN_USTAR_STAR: {
14816 pm_parameters_order_t previous_order = order;
14817 update_parameter_state(parser, &parser->current, &order);
14818 parser_lex(parser);
14819
14820 pm_token_t operator = parser->previous;
14821 pm_node_t *param;
14822
14823 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14824 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14825 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14826 }
14827
14828 param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
14829 } else {
14830 pm_token_t name;
14831
14832 bool repeated = false;
14833 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14834 name = parser->previous;
14835 repeated = pm_parser_parameter_name_check(parser, &name);
14836 pm_parser_local_add_token(parser, &name, 1);
14837 } else {
14838 name = not_provided(parser);
14839 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14840 }
14841
14842 param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
14843 if (repeated) {
14844 pm_node_flag_set_repeated_parameter(param);
14845 }
14846 }
14847
14848 if (params->keyword_rest == NULL) {
14849 pm_parameters_node_keyword_rest_set(params, param);
14850 } else {
14851 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14852 pm_parameters_node_posts_append(params, param);
14853 }
14854
14855 break;
14856 }
14857 default:
14858 if (parser->previous.type == PM_TOKEN_COMMA) {
14859 if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14860 // If we get here, then we have a trailing comma in a
14861 // block parameter list.
14862 pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14863
14864 if (params->rest == NULL) {
14865 pm_parameters_node_rest_set(params, param);
14866 } else {
14867 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
14868 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14869 }
14870 } else {
14871 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14872 }
14873 }
14874
14875 parsing = false;
14876 break;
14877 }
14878
14879 // If we hit some kind of issue while parsing the parameter, this would
14880 // have been set to false. In that case, we need to break out of the
14881 // loop.
14882 if (!parsing) break;
14883
14884 bool accepted_newline = false;
14885 if (uses_parentheses) {
14886 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14887 }
14888
14889 if (accept1(parser, PM_TOKEN_COMMA)) {
14890 // If there was a comma, but we also accepted a newline, then this
14891 // is a syntax error.
14892 if (accepted_newline) {
14893 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14894 }
14895 } else {
14896 // If there was no comma, then we're done parsing parameters.
14897 break;
14898 }
14899 }
14900
14901 pm_do_loop_stack_pop(parser);
14902
14903 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14904 if (params->base.location.start == params->base.location.end) {
14905 pm_node_destroy(parser, (pm_node_t *) params);
14906 return NULL;
14907 }
14908
14909 return params;
14910}
14911
14916static size_t
14917token_newline_index(const pm_parser_t *parser) {
14918 if (parser->heredoc_end == NULL) {
14919 // This is the common case. In this case we can look at the previously
14920 // recorded newline in the newline list and subtract from the current
14921 // offset.
14922 return parser->newline_list.size - 1;
14923 } else {
14924 // This is unlikely. This is the case that we have already parsed the
14925 // start of a heredoc, so we cannot rely on looking at the previous
14926 // offset of the newline list, and instead must go through the whole
14927 // process of a binary search for the line number.
14928 return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
14929 }
14930}
14931
14936static int64_t
14937token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14938 const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
14939 const uint8_t *end = token->start;
14940
14941 // Skip over the BOM if it is present.
14942 if (
14943 newline_index == 0 &&
14944 parser->start[0] == 0xef &&
14945 parser->start[1] == 0xbb &&
14946 parser->start[2] == 0xbf
14947 ) cursor += 3;
14948
14949 int64_t column = 0;
14950 for (; cursor < end; cursor++) {
14951 switch (*cursor) {
14952 case '\t':
14953 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14954 break;
14955 case ' ':
14956 column++;
14957 break;
14958 default:
14959 column++;
14960 if (break_on_non_space) return -1;
14961 break;
14962 }
14963 }
14964
14965 return column;
14966}
14967
14972static void
14973parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14974 // If these warnings are disabled (unlikely), then we can just return.
14975 if (!parser->warn_mismatched_indentation) return;
14976
14977 // If the tokens are on the same line, we do not warn.
14978 size_t closing_newline_index = token_newline_index(parser);
14979 if (opening_newline_index == closing_newline_index) return;
14980
14981 // If the opening token has anything other than spaces or tabs before it,
14982 // then we do not warn. This is unless we are matching up an `if`/`end` pair
14983 // and the `if` immediately follows an `else` keyword.
14984 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14985 if (!if_after_else && (opening_column == -1)) return;
14986
14987 // Get a reference to the closing token off the current parser. This assumes
14988 // that the caller has placed this in the correct position.
14989 pm_token_t *closing_token = &parser->current;
14990
14991 // If the tokens are at the same indentation, we do not warn.
14992 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14993 if ((closing_column == -1) || (opening_column == closing_column)) return;
14994
14995 // If the closing column is greater than the opening column and we are
14996 // allowing indentation, then we do not warn.
14997 if (allow_indent && (closing_column > opening_column)) return;
14998
14999 // Otherwise, add a warning.
15000 PM_PARSER_WARN_FORMAT(
15001 parser,
15002 closing_token->start,
15003 closing_token->end,
15004 PM_WARN_INDENTATION_MISMATCH,
15005 (int) (closing_token->end - closing_token->start),
15006 (const char *) closing_token->start,
15007 (int) (opening_token->end - opening_token->start),
15008 (const char *) opening_token->start,
15009 ((int32_t) opening_newline_index) + parser->start_line
15010 );
15011}
15012
15013typedef enum {
15014 PM_RESCUES_BEGIN = 1,
15015 PM_RESCUES_BLOCK,
15016 PM_RESCUES_CLASS,
15017 PM_RESCUES_DEF,
15018 PM_RESCUES_LAMBDA,
15019 PM_RESCUES_MODULE,
15020 PM_RESCUES_SCLASS
15021} pm_rescues_type_t;
15022
15027static inline void
15028parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
15029 pm_rescue_node_t *current = NULL;
15030
15031 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
15032 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15033 parser_lex(parser);
15034
15035 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
15036
15037 switch (parser->current.type) {
15039 // Here we have an immediate => after the rescue keyword, in which case
15040 // we're going to have an empty list of exceptions to rescue (which
15041 // implies StandardError).
15042 parser_lex(parser);
15043 pm_rescue_node_operator_set(rescue, &parser->previous);
15044
15045 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15046 reference = parse_target(parser, reference, false, false);
15047
15048 pm_rescue_node_reference_set(rescue, reference);
15049 break;
15050 }
15051 case PM_TOKEN_NEWLINE:
15052 case PM_TOKEN_SEMICOLON:
15054 // Here we have a terminator for the rescue keyword, in which case we're
15055 // going to just continue on.
15056 break;
15057 default: {
15058 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
15059 // Here we have something that could be an exception expression, so
15060 // we'll attempt to parse it here and any others delimited by commas.
15061
15062 do {
15063 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
15064 pm_rescue_node_exceptions_append(rescue, expression);
15065
15066 // If we hit a newline, then this is the end of the rescue expression. We
15067 // can continue on to parse the statements.
15068 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
15069
15070 // If we hit a `=>` then we're going to parse the exception variable. Once
15071 // we've done that, we'll break out of the loop and parse the statements.
15072 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
15073 pm_rescue_node_operator_set(rescue, &parser->previous);
15074
15075 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15076 reference = parse_target(parser, reference, false, false);
15077
15078 pm_rescue_node_reference_set(rescue, reference);
15079 break;
15080 }
15081 } while (accept1(parser, PM_TOKEN_COMMA));
15082 }
15083 }
15084 }
15085
15086 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15087 accept1(parser, PM_TOKEN_KEYWORD_THEN);
15088 } else {
15089 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
15090 }
15091
15093 pm_accepts_block_stack_push(parser, true);
15094 pm_context_t context;
15095
15096 switch (type) {
15097 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
15098 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
15099 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
15100 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
15101 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
15102 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
15103 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
15104 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15105 }
15106
15107 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15108 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
15109
15110 pm_accepts_block_stack_pop(parser);
15111 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15112 }
15113
15114 if (current == NULL) {
15115 pm_begin_node_rescue_clause_set(parent_node, rescue);
15116 } else {
15117 pm_rescue_node_subsequent_set(current, rescue);
15118 }
15119
15120 current = rescue;
15121 }
15122
15123 // The end node locations on rescue nodes will not be set correctly
15124 // since we won't know the end until we've found all subsequent
15125 // clauses. This sets the end location on all rescues once we know it.
15126 if (current != NULL) {
15127 const uint8_t *end_to_set = current->base.location.end;
15128 pm_rescue_node_t *clause = parent_node->rescue_clause;
15129
15130 while (clause != NULL) {
15131 clause->base.location.end = end_to_set;
15132 clause = clause->subsequent;
15133 }
15134 }
15135
15136 pm_token_t else_keyword;
15137 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15138 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15139 opening_newline_index = token_newline_index(parser);
15140
15141 else_keyword = parser->current;
15142 opening = &else_keyword;
15143
15144 parser_lex(parser);
15145 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15146
15147 pm_statements_node_t *else_statements = NULL;
15148 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
15149 pm_accepts_block_stack_push(parser, true);
15150 pm_context_t context;
15151
15152 switch (type) {
15153 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
15154 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
15155 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
15156 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
15157 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15158 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15159 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15160 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15161 }
15162
15163 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15164 pm_accepts_block_stack_pop(parser);
15165
15166 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15167 }
15168
15169 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
15170 pm_begin_node_else_clause_set(parent_node, else_clause);
15171
15172 // If we don't have a `current` rescue node, then this is a dangling
15173 // else, and it's an error.
15174 if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
15175 }
15176
15177 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
15178 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15179 pm_token_t ensure_keyword = parser->current;
15180
15181 parser_lex(parser);
15182 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15183
15184 pm_statements_node_t *ensure_statements = NULL;
15185 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15186 pm_accepts_block_stack_push(parser, true);
15187 pm_context_t context;
15188
15189 switch (type) {
15190 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
15191 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
15192 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
15193 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
15194 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
15195 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
15196 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
15197 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15198 }
15199
15200 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15201 pm_accepts_block_stack_pop(parser);
15202
15203 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15204 }
15205
15206 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
15207 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
15208 }
15209
15210 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
15211 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15212 pm_begin_node_end_keyword_set(parent_node, &parser->current);
15213 } else {
15214 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15215 pm_begin_node_end_keyword_set(parent_node, &end_keyword);
15216 }
15217}
15218
15223static pm_begin_node_t *
15224parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
15225 pm_token_t begin_keyword = not_provided(parser);
15226 pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
15227
15228 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
15229 node->base.location.start = start;
15230
15231 return node;
15232}
15233
15238parse_block_parameters(
15239 pm_parser_t *parser,
15240 bool allows_trailing_comma,
15241 const pm_token_t *opening,
15242 bool is_lambda_literal,
15243 bool accepts_blocks_in_defaults,
15244 uint16_t depth
15245) {
15246 pm_parameters_node_t *parameters = NULL;
15247 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
15248 parameters = parse_parameters(
15249 parser,
15250 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
15251 false,
15252 allows_trailing_comma,
15253 false,
15254 accepts_blocks_in_defaults,
15255 true,
15256 (uint16_t) (depth + 1)
15257 );
15258 }
15259
15260 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
15261 if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
15262 accept1(parser, PM_TOKEN_NEWLINE);
15263
15264 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
15265 do {
15266 switch (parser->current.type) {
15267 case PM_TOKEN_CONSTANT:
15268 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
15269 parser_lex(parser);
15270 break;
15272 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
15273 parser_lex(parser);
15274 break;
15276 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
15277 parser_lex(parser);
15278 break;
15280 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
15281 parser_lex(parser);
15282 break;
15283 default:
15284 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
15285 break;
15286 }
15287
15288 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
15289 pm_parser_local_add_token(parser, &parser->previous, 1);
15290
15291 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
15292 if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
15293
15294 pm_block_parameters_node_append_local(block_parameters, local);
15295 } while (accept1(parser, PM_TOKEN_COMMA));
15296 }
15297 }
15298
15299 return block_parameters;
15300}
15301
15306static bool
15307outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15308 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15309 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
15310 }
15311
15312 return false;
15313}
15314
15320static const char * const pm_numbered_parameter_names[] = {
15321 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15322};
15323
15329static pm_node_t *
15330parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
15331 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
15332
15333 // If we have ordinary parameters, then we will return them as the set of
15334 // parameters.
15335 if (parameters != NULL) {
15336 // If we also have implicit parameters, then this is an error.
15337 if (implicit_parameters->size > 0) {
15338 pm_node_t *node = implicit_parameters->nodes[0];
15339
15341 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15343 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
15344 } else {
15345 assert(false && "unreachable");
15346 }
15347 }
15348
15349 return parameters;
15350 }
15351
15352 // If we don't have any implicit parameters, then the set of parameters is
15353 // NULL.
15354 if (implicit_parameters->size == 0) {
15355 return NULL;
15356 }
15357
15358 // If we don't have ordinary parameters, then we now must validate our set
15359 // of implicit parameters. We can only have numbered parameters or it, but
15360 // they cannot be mixed.
15361 uint8_t numbered_parameter = 0;
15362 bool it_parameter = false;
15363
15364 for (size_t index = 0; index < implicit_parameters->size; index++) {
15365 pm_node_t *node = implicit_parameters->nodes[index];
15366
15368 if (it_parameter) {
15369 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15370 } else if (outer_scope_using_numbered_parameters_p(parser)) {
15371 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15372 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15373 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15374 } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
15375 numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
15376 } else {
15377 assert(false && "unreachable");
15378 }
15380 if (numbered_parameter > 0) {
15381 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15382 } else {
15383 it_parameter = true;
15384 }
15385 }
15386 }
15387
15388 if (numbered_parameter > 0) {
15389 // Go through the parent scopes and mark them as being disallowed from
15390 // using numbered parameters because this inner scope is using them.
15391 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15392 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15393 }
15394
15395 const pm_location_t location = { .start = opening->start, .end = closing->end };
15396 return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
15397 }
15398
15399 if (it_parameter) {
15400 return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
15401 }
15402
15403 return NULL;
15404}
15405
15409static pm_block_node_t *
15410parse_block(pm_parser_t *parser, uint16_t depth) {
15411 pm_token_t opening = parser->previous;
15412 accept1(parser, PM_TOKEN_NEWLINE);
15413
15414 pm_accepts_block_stack_push(parser, true);
15415 pm_parser_scope_push(parser, false);
15416
15417 pm_block_parameters_node_t *block_parameters = NULL;
15418
15419 if (accept1(parser, PM_TOKEN_PIPE)) {
15420 pm_token_t block_parameters_opening = parser->previous;
15421 if (match1(parser, PM_TOKEN_PIPE)) {
15422 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15423 parser->command_start = true;
15424 parser_lex(parser);
15425 } else {
15426 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15427 accept1(parser, PM_TOKEN_NEWLINE);
15428 parser->command_start = true;
15429 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15430 }
15431
15432 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
15433 }
15434
15435 accept1(parser, PM_TOKEN_NEWLINE);
15436 pm_node_t *statements = NULL;
15437
15438 if (opening.type == PM_TOKEN_BRACE_LEFT) {
15439 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15440 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
15441 }
15442
15443 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
15444 } else {
15445 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15447 pm_accepts_block_stack_push(parser, true);
15448 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
15449 pm_accepts_block_stack_pop(parser);
15450 }
15451
15452 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15453 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15454 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
15455 }
15456 }
15457
15458 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
15459 }
15460
15461 pm_constant_id_list_t locals;
15462 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15463 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
15464
15465 pm_parser_scope_pop(parser);
15466 pm_accepts_block_stack_pop(parser);
15467
15468 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15469}
15470
15476static bool
15477parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
15478 bool found = false;
15479
15480 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15481 found |= true;
15482 arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15483
15484 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15485 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15486 } else {
15487 pm_accepts_block_stack_push(parser, true);
15488 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
15489
15490 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15491 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
15492 parser->previous.start = parser->previous.end;
15493 parser->previous.type = PM_TOKEN_MISSING;
15494 }
15495
15496 pm_accepts_block_stack_pop(parser);
15497 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15498 }
15499 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15500 found |= true;
15501 pm_accepts_block_stack_push(parser, false);
15502
15503 // If we get here, then the subsequent token cannot be used as an infix
15504 // operator. In this case we assume the subsequent token is part of an
15505 // argument to this method call.
15506 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
15507
15508 // If we have done with the arguments and still not consumed the comma,
15509 // then we have a trailing comma where we need to check whether it is
15510 // allowed or not.
15511 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15512 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
15513 }
15514
15515 pm_accepts_block_stack_pop(parser);
15516 }
15517
15518 // If we're at the end of the arguments, we can now check if there is a block
15519 // node that starts with a {. If there is, then we can parse it and add it to
15520 // the arguments.
15521 if (accepts_block) {
15522 pm_block_node_t *block = NULL;
15523
15524 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15525 found |= true;
15526 block = parse_block(parser, (uint16_t) (depth + 1));
15527 pm_arguments_validate_block(parser, arguments, block);
15528 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15529 found |= true;
15530 block = parse_block(parser, (uint16_t) (depth + 1));
15531 }
15532
15533 if (block != NULL) {
15534 if (arguments->block == NULL && !arguments->has_forwarding) {
15535 arguments->block = (pm_node_t *) block;
15536 } else {
15537 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
15538
15539 if (arguments->block != NULL) {
15540 if (arguments->arguments == NULL) {
15541 arguments->arguments = pm_arguments_node_create(parser);
15542 }
15543 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
15544 }
15545 arguments->block = (pm_node_t *) block;
15546 }
15547 }
15548 }
15549
15550 return found;
15551}
15552
15557static void
15558parse_return(pm_parser_t *parser, pm_node_t *node) {
15559 bool in_sclass = false;
15560 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15561 switch (context_node->context) {
15565 case PM_CONTEXT_BEGIN:
15566 case PM_CONTEXT_CASE_IN:
15569 case PM_CONTEXT_DEFINED:
15570 case PM_CONTEXT_ELSE:
15571 case PM_CONTEXT_ELSIF:
15572 case PM_CONTEXT_EMBEXPR:
15574 case PM_CONTEXT_FOR:
15575 case PM_CONTEXT_IF:
15577 case PM_CONTEXT_MAIN:
15579 case PM_CONTEXT_PARENS:
15580 case PM_CONTEXT_POSTEXE:
15582 case PM_CONTEXT_PREEXE:
15584 case PM_CONTEXT_TERNARY:
15585 case PM_CONTEXT_UNLESS:
15586 case PM_CONTEXT_UNTIL:
15587 case PM_CONTEXT_WHILE:
15588 // Keep iterating up the lists of contexts, because returns can
15589 // see through these.
15590 continue;
15594 case PM_CONTEXT_SCLASS:
15595 in_sclass = true;
15596 continue;
15600 case PM_CONTEXT_CLASS:
15604 case PM_CONTEXT_MODULE:
15605 // These contexts are invalid for a return.
15606 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15607 return;
15617 case PM_CONTEXT_DEF:
15623 // These contexts are valid for a return, and we should not
15624 // continue to loop.
15625 return;
15626 case PM_CONTEXT_NONE:
15627 // This case should never happen.
15628 assert(false && "unreachable");
15629 break;
15630 }
15631 }
15632 if (in_sclass) {
15633 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15634 }
15635}
15636
15641static void
15642parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15643 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15644 switch (context_node->context) {
15650 case PM_CONTEXT_DEFINED:
15651 case PM_CONTEXT_FOR:
15658 case PM_CONTEXT_POSTEXE:
15659 case PM_CONTEXT_UNTIL:
15660 case PM_CONTEXT_WHILE:
15661 // These are the good cases. We're allowed to have a block exit
15662 // in these contexts.
15663 return;
15664 case PM_CONTEXT_DEF:
15669 case PM_CONTEXT_MAIN:
15670 case PM_CONTEXT_PREEXE:
15671 case PM_CONTEXT_SCLASS:
15675 // These are the bad cases. We're not allowed to have a block
15676 // exit in these contexts.
15677 //
15678 // If we get here, then we're about to mark this block exit
15679 // as invalid. However, it could later _become_ valid if we
15680 // find a trailing while/until on the expression. In this
15681 // case instead of adding the error here, we'll add the
15682 // block exit to the list of exits for the expression, and
15683 // the node parsing will handle validating it instead.
15684 assert(parser->current_block_exits != NULL);
15685 pm_node_list_append(parser->current_block_exits, node);
15686 return;
15690 case PM_CONTEXT_BEGIN:
15691 case PM_CONTEXT_CASE_IN:
15696 case PM_CONTEXT_CLASS:
15698 case PM_CONTEXT_ELSE:
15699 case PM_CONTEXT_ELSIF:
15700 case PM_CONTEXT_EMBEXPR:
15702 case PM_CONTEXT_IF:
15706 case PM_CONTEXT_MODULE:
15708 case PM_CONTEXT_PARENS:
15711 case PM_CONTEXT_TERNARY:
15712 case PM_CONTEXT_UNLESS:
15713 // In these contexts we should continue walking up the list of
15714 // contexts.
15715 break;
15716 case PM_CONTEXT_NONE:
15717 // This case should never happen.
15718 assert(false && "unreachable");
15719 break;
15720 }
15721 }
15722}
15723
15728static pm_node_list_t *
15729push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15730 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15731 parser->current_block_exits = current_block_exits;
15732 return previous_block_exits;
15733}
15734
15740static void
15741flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15742 pm_node_t *block_exit;
15743 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15744 const char *type;
15745
15746 switch (PM_NODE_TYPE(block_exit)) {
15747 case PM_BREAK_NODE: type = "break"; break;
15748 case PM_NEXT_NODE: type = "next"; break;
15749 case PM_REDO_NODE: type = "redo"; break;
15750 default: assert(false && "unreachable"); type = ""; break;
15751 }
15752
15753 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15754 }
15755
15756 parser->current_block_exits = previous_block_exits;
15757}
15758
15763static void
15764pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15766 // If we matched a trailing while/until, then all of the block exits in
15767 // the contained list are valid. In this case we do not need to do
15768 // anything.
15769 parser->current_block_exits = previous_block_exits;
15770 } else if (previous_block_exits != NULL) {
15771 // If we did not matching a trailing while/until, then all of the block
15772 // exits contained in the list are invalid for this specific context.
15773 // However, they could still become valid in a higher level context if
15774 // there is another list above this one. In this case we'll push all of
15775 // the block exits up to the previous list.
15776 pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15777 parser->current_block_exits = previous_block_exits;
15778 } else {
15779 // If we did not match a trailing while/until and this was the last
15780 // chance to do so, then all of the block exits in the list are invalid
15781 // and we need to add an error for each of them.
15782 flush_block_exits(parser, previous_block_exits);
15783 }
15784}
15785
15786static inline pm_node_t *
15787parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15788 context_push(parser, PM_CONTEXT_PREDICATE);
15789 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15790 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15791
15792 // Predicates are closed by a term, a "then", or a term and then a "then".
15793 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15794
15795 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15796 predicate_closed = true;
15797 *then_keyword = parser->previous;
15798 }
15799
15800 if (!predicate_closed) {
15801 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15802 }
15803
15804 context_pop(parser);
15805 return predicate;
15806}
15807
15808static inline pm_node_t *
15809parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15810 pm_node_list_t current_block_exits = { 0 };
15811 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15812
15813 pm_token_t keyword = parser->previous;
15814 pm_token_t then_keyword = not_provided(parser);
15815
15816 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15817 pm_statements_node_t *statements = NULL;
15818
15820 pm_accepts_block_stack_push(parser, true);
15821 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15822 pm_accepts_block_stack_pop(parser);
15823 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15824 }
15825
15826 pm_token_t end_keyword = not_provided(parser);
15827 pm_node_t *parent = NULL;
15828
15829 switch (context) {
15830 case PM_CONTEXT_IF:
15831 parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15832 break;
15833 case PM_CONTEXT_UNLESS:
15834 parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
15835 break;
15836 default:
15837 assert(false && "unreachable");
15838 break;
15839 }
15840
15841 pm_node_t *current = parent;
15842
15843 // Parse any number of elsif clauses. This will form a linked list of if
15844 // nodes pointing to each other from the top.
15845 if (context == PM_CONTEXT_IF) {
15846 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15847 if (parser_end_of_line_p(parser)) {
15848 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15849 }
15850
15851 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15852 pm_token_t elsif_keyword = parser->current;
15853 parser_lex(parser);
15854
15855 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15856 pm_accepts_block_stack_push(parser, true);
15857
15858 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15859 pm_accepts_block_stack_pop(parser);
15860 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15861
15862 pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15863 ((pm_if_node_t *) current)->subsequent = elsif;
15864 current = elsif;
15865 }
15866 }
15867
15868 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15869 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15870 opening_newline_index = token_newline_index(parser);
15871
15872 parser_lex(parser);
15873 pm_token_t else_keyword = parser->previous;
15874
15875 pm_accepts_block_stack_push(parser, true);
15876 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15877 pm_accepts_block_stack_pop(parser);
15878
15879 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15880 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15881 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
15882
15883 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15884
15885 switch (context) {
15886 case PM_CONTEXT_IF:
15887 ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
15888 break;
15889 case PM_CONTEXT_UNLESS:
15890 ((pm_unless_node_t *) parent)->else_clause = else_node;
15891 break;
15892 default:
15893 assert(false && "unreachable");
15894 break;
15895 }
15896 } else {
15897 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15898 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
15899 }
15900
15901 // Set the appropriate end location for all of the nodes in the subtree.
15902 switch (context) {
15903 case PM_CONTEXT_IF: {
15904 pm_node_t *current = parent;
15905 bool recursing = true;
15906
15907 while (recursing) {
15908 switch (PM_NODE_TYPE(current)) {
15909 case PM_IF_NODE:
15910 pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
15911 current = ((pm_if_node_t *) current)->subsequent;
15912 recursing = current != NULL;
15913 break;
15914 case PM_ELSE_NODE:
15915 pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
15916 recursing = false;
15917 break;
15918 default: {
15919 recursing = false;
15920 break;
15921 }
15922 }
15923 }
15924 break;
15925 }
15926 case PM_CONTEXT_UNLESS:
15927 pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
15928 break;
15929 default:
15930 assert(false && "unreachable");
15931 break;
15932 }
15933
15934 pop_block_exits(parser, previous_block_exits);
15935 pm_node_list_free(&current_block_exits);
15936
15937 return parent;
15938}
15939
15944#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15945 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15946 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15947 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15948 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15949 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15950 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15951 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15952 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15953 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15954 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15955
15960#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15961 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15962 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15963 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15964 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15965 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15966 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15967 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15968
15974#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15975 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15976 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15977 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15978 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15979 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15980 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15981 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15982 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15983
15988#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15989 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15990 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15991 case PM_TOKEN_CLASS_VARIABLE
15992
15997#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15998 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15999 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
16000 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
16001
16002// Assert here that the flags are the same so that we can safely switch the type
16003// of the node without having to move the flags.
16004PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
16005
16010static inline pm_node_flags_t
16011parse_unescaped_encoding(const pm_parser_t *parser) {
16012 if (parser->explicit_encoding != NULL) {
16014 // If the there's an explicit encoding and it's using a UTF-8 escape
16015 // sequence, then mark the string as UTF-8.
16017 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
16018 // If there's a non-UTF-8 escape sequence being used, then the
16019 // string uses the source encoding, unless the source is marked as
16020 // US-ASCII. In that case the string is forced as ASCII-8BIT in
16021 // order to keep the string valid.
16023 }
16024 }
16025 return 0;
16026}
16027
16032static pm_node_t *
16033parse_string_part(pm_parser_t *parser, uint16_t depth) {
16034 switch (parser->current.type) {
16035 // Here the lexer has returned to us plain string content. In this case
16036 // we'll create a string node that has no opening or closing and return that
16037 // as the part. These kinds of parts look like:
16038 //
16039 // "aaa #{bbb} #@ccc ddd"
16040 // ^^^^ ^ ^^^^
16042 pm_token_t opening = not_provided(parser);
16043 pm_token_t closing = not_provided(parser);
16044
16045 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
16046 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16047
16048 parser_lex(parser);
16049 return node;
16050 }
16051 // Here the lexer has returned the beginning of an embedded expression. In
16052 // that case we'll parse the inner statements and return that as the part.
16053 // These kinds of parts look like:
16054 //
16055 // "aaa #{bbb} #@ccc ddd"
16056 // ^^^^^^
16058 // Ruby disallows seeing encoding around interpolation in strings,
16059 // even though it is known at parse time.
16060 parser->explicit_encoding = NULL;
16061
16062 pm_lex_state_t state = parser->lex_state;
16063 int brace_nesting = parser->brace_nesting;
16064
16065 parser->brace_nesting = 0;
16066 lex_state_set(parser, PM_LEX_STATE_BEG);
16067 parser_lex(parser);
16068
16069 pm_token_t opening = parser->previous;
16070 pm_statements_node_t *statements = NULL;
16071
16072 if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
16073 pm_accepts_block_stack_push(parser, true);
16074 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
16075 pm_accepts_block_stack_pop(parser);
16076 }
16077
16078 parser->brace_nesting = brace_nesting;
16079 lex_state_set(parser, state);
16080
16081 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
16082 pm_token_t closing = parser->previous;
16083
16084 // If this set of embedded statements only contains a single
16085 // statement, then Ruby does not consider it as a possible statement
16086 // that could emit a line event.
16087 if (statements != NULL && statements->body.size == 1) {
16088 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
16089 }
16090
16091 return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
16092 }
16093
16094 // Here the lexer has returned the beginning of an embedded variable.
16095 // In that case we'll parse the variable and create an appropriate node
16096 // for it and then return that node. These kinds of parts look like:
16097 //
16098 // "aaa #{bbb} #@ccc ddd"
16099 // ^^^^^
16100 case PM_TOKEN_EMBVAR: {
16101 // Ruby disallows seeing encoding around interpolation in strings,
16102 // even though it is known at parse time.
16103 parser->explicit_encoding = NULL;
16104
16105 lex_state_set(parser, PM_LEX_STATE_BEG);
16106 parser_lex(parser);
16107
16108 pm_token_t operator = parser->previous;
16109 pm_node_t *variable;
16110
16111 switch (parser->current.type) {
16112 // In this case a back reference is being interpolated. We'll
16113 // create a global variable read node.
16115 parser_lex(parser);
16116 variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16117 break;
16118 // In this case an nth reference is being interpolated. We'll
16119 // create a global variable read node.
16121 parser_lex(parser);
16122 variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16123 break;
16124 // In this case a global variable is being interpolated. We'll
16125 // create a global variable read node.
16127 parser_lex(parser);
16128 variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16129 break;
16130 // In this case an instance variable is being interpolated.
16131 // We'll create an instance variable read node.
16133 parser_lex(parser);
16134 variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
16135 break;
16136 // In this case a class variable is being interpolated. We'll
16137 // create a class variable read node.
16139 parser_lex(parser);
16140 variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
16141 break;
16142 // We can hit here if we got an invalid token. In that case
16143 // we'll not attempt to lex this token and instead just return a
16144 // missing node.
16145 default:
16146 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
16147 variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16148 break;
16149 }
16150
16151 return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
16152 }
16153 default:
16154 parser_lex(parser);
16155 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
16156 return NULL;
16157 }
16158}
16159
16165static const uint8_t *
16166parse_operator_symbol_name(const pm_token_t *name) {
16167 switch (name->type) {
16168 case PM_TOKEN_TILDE:
16169 case PM_TOKEN_BANG:
16170 if (name->end[-1] == '@') return name->end - 1;
16171 /* fallthrough */
16172 default:
16173 return name->end;
16174 }
16175}
16176
16177static pm_node_t *
16178parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
16179 pm_token_t closing = not_provided(parser);
16180 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
16181
16182 const uint8_t *end = parse_operator_symbol_name(&parser->current);
16183
16184 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16185 parser_lex(parser);
16186
16187 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
16188 pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
16189
16190 return (pm_node_t *) symbol;
16191}
16192
16198static pm_node_t *
16199parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
16200 const pm_token_t opening = parser->previous;
16201
16202 if (lex_mode->mode != PM_LEX_STRING) {
16203 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16204
16205 switch (parser->current.type) {
16206 case PM_CASE_OPERATOR:
16207 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
16209 case PM_TOKEN_CONSTANT:
16216 case PM_CASE_KEYWORD:
16217 parser_lex(parser);
16218 break;
16219 default:
16220 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
16221 break;
16222 }
16223
16224 pm_token_t closing = not_provided(parser);
16225 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16226
16227 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16228 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16229
16230 return (pm_node_t *) symbol;
16231 }
16232
16233 if (lex_mode->as.string.interpolation) {
16234 // If we have the end of the symbol, then we can return an empty symbol.
16235 if (match1(parser, PM_TOKEN_STRING_END)) {
16236 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16237 parser_lex(parser);
16238
16239 pm_token_t content = not_provided(parser);
16240 pm_token_t closing = parser->previous;
16241 return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
16242 }
16243
16244 // Now we can parse the first part of the symbol.
16245 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
16246
16247 // If we got a string part, then it's possible that we could transform
16248 // what looks like an interpolated symbol into a regular symbol.
16249 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16250 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16251 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16252
16253 return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
16254 }
16255
16256 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16257 if (part) pm_interpolated_symbol_node_append(symbol, part);
16258
16259 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16260 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16261 pm_interpolated_symbol_node_append(symbol, part);
16262 }
16263 }
16264
16265 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16266 if (match1(parser, PM_TOKEN_EOF)) {
16267 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16268 } else {
16269 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16270 }
16271
16272 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16273 return (pm_node_t *) symbol;
16274 }
16275
16276 pm_token_t content;
16277 pm_string_t unescaped;
16278
16279 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16280 content = parser->current;
16281 unescaped = parser->current_string;
16282 parser_lex(parser);
16283
16284 // If we have two string contents in a row, then the content of this
16285 // symbol is split because of heredoc contents. This looks like:
16286 //
16287 // <<A; :'a
16288 // A
16289 // b'
16290 //
16291 // In this case, the best way we have to represent this is as an
16292 // interpolated string node, so that's what we'll do here.
16293 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16294 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16295 pm_token_t bounds = not_provided(parser);
16296
16297 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
16298 pm_interpolated_symbol_node_append(symbol, part);
16299
16300 part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
16301 pm_interpolated_symbol_node_append(symbol, part);
16302
16303 if (next_state != PM_LEX_STATE_NONE) {
16304 lex_state_set(parser, next_state);
16305 }
16306
16307 parser_lex(parser);
16308 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16309
16310 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16311 return (pm_node_t *) symbol;
16312 }
16313 } else {
16314 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
16315 pm_string_shared_init(&unescaped, content.start, content.end);
16316 }
16317
16318 if (next_state != PM_LEX_STATE_NONE) {
16319 lex_state_set(parser, next_state);
16320 }
16321
16322 if (match1(parser, PM_TOKEN_EOF)) {
16323 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
16324 } else {
16325 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16326 }
16327
16328 return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
16329}
16330
16335static inline pm_node_t *
16336parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
16337 switch (parser->current.type) {
16338 case PM_CASE_OPERATOR: {
16339 const pm_token_t opening = not_provided(parser);
16340 return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
16341 }
16342 case PM_CASE_KEYWORD:
16343 case PM_TOKEN_CONSTANT:
16345 case PM_TOKEN_METHOD_NAME: {
16346 parser_lex(parser);
16347
16348 pm_token_t opening = not_provided(parser);
16349 pm_token_t closing = not_provided(parser);
16350 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16351
16352 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16353 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16354
16355 return (pm_node_t *) symbol;
16356 }
16357 case PM_TOKEN_SYMBOL_BEGIN: {
16358 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16359 parser_lex(parser);
16360
16361 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16362 }
16363 default:
16364 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16365 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16366 }
16367}
16368
16375static inline pm_node_t *
16376parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16377 switch (parser->current.type) {
16378 case PM_CASE_OPERATOR: {
16379 const pm_token_t opening = not_provided(parser);
16380 return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16381 }
16382 case PM_CASE_KEYWORD:
16383 case PM_TOKEN_CONSTANT:
16385 case PM_TOKEN_METHOD_NAME: {
16386 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16387 parser_lex(parser);
16388
16389 pm_token_t opening = not_provided(parser);
16390 pm_token_t closing = not_provided(parser);
16391 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16392
16393 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16394 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16395
16396 return (pm_node_t *) symbol;
16397 }
16398 case PM_TOKEN_SYMBOL_BEGIN: {
16399 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16400 parser_lex(parser);
16401
16402 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16403 }
16405 parser_lex(parser);
16406 return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16408 parser_lex(parser);
16409 return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16411 parser_lex(parser);
16412 return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16413 default:
16414 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16415 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16416 }
16417}
16418
16423static pm_node_t *
16424parse_variable(pm_parser_t *parser) {
16425 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16426 int depth;
16427 bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
16428
16429 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16430 return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16431 }
16432
16433 pm_scope_t *current_scope = parser->current_scope;
16434 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16435 if (is_numbered_param) {
16436 // When you use a numbered parameter, it implies the existence of
16437 // all of the locals that exist before it. For example, referencing
16438 // _2 means that _1 must exist. Therefore here we loop through all
16439 // of the possibilities and add them into the constant pool.
16440 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16441 for (uint8_t number = 1; number <= maximum; number++) {
16442 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16443 }
16444
16445 if (!match1(parser, PM_TOKEN_EQUAL)) {
16446 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16447 }
16448
16449 pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
16450 pm_node_list_append(&current_scope->implicit_parameters, node);
16451
16452 return node;
16453 } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16454 pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
16455 pm_node_list_append(&current_scope->implicit_parameters, node);
16456
16457 return node;
16458 }
16459 }
16460
16461 return NULL;
16462}
16463
16467static pm_node_t *
16468parse_variable_call(pm_parser_t *parser) {
16469 pm_node_flags_t flags = 0;
16470
16471 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16472 pm_node_t *node = parse_variable(parser);
16473 if (node != NULL) return node;
16475 }
16476
16477 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16478 pm_node_flag_set((pm_node_t *)node, flags);
16479
16480 return (pm_node_t *) node;
16481}
16482
16488static inline pm_token_t
16489parse_method_definition_name(pm_parser_t *parser) {
16490 switch (parser->current.type) {
16491 case PM_CASE_KEYWORD:
16492 case PM_TOKEN_CONSTANT:
16494 parser_lex(parser);
16495 return parser->previous;
16497 pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
16498 parser_lex(parser);
16499 return parser->previous;
16500 case PM_CASE_OPERATOR:
16501 lex_state_set(parser, PM_LEX_STATE_ENDFN);
16502 parser_lex(parser);
16503 return parser->previous;
16504 default:
16505 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
16506 return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
16507 }
16508}
16509
16510static void
16511parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
16512 // Get a reference to the string struct that is being held by the string
16513 // node. This is the value we're going to actually manipulate.
16514 pm_string_ensure_owned(string);
16515
16516 // Now get the bounds of the existing string. We'll use this as a
16517 // destination to move bytes into. We'll also use it for bounds checking
16518 // since we don't require that these strings be null terminated.
16519 size_t dest_length = pm_string_length(string);
16520 const uint8_t *source_cursor = (uint8_t *) string->source;
16521 const uint8_t *source_end = source_cursor + dest_length;
16522
16523 // We're going to move bytes backward in the string when we get leading
16524 // whitespace, so we'll maintain a pointer to the current position in the
16525 // string that we're writing to.
16526 size_t trimmed_whitespace = 0;
16527
16528 // While we haven't reached the amount of common whitespace that we need to
16529 // trim and we haven't reached the end of the string, we'll keep trimming
16530 // whitespace. Trimming in this context means skipping over these bytes such
16531 // that they aren't copied into the new string.
16532 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16533 if (*source_cursor == '\t') {
16534 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16535 if (trimmed_whitespace > common_whitespace) break;
16536 } else {
16537 trimmed_whitespace++;
16538 }
16539
16540 source_cursor++;
16541 dest_length--;
16542 }
16543
16544 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
16545 string->length = dest_length;
16546}
16547
16551static void
16552parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16553 // The next node should be dedented if it's the first node in the list or if
16554 // it follows a string node.
16555 bool dedent_next = true;
16556
16557 // Iterate over all nodes, and trim whitespace accordingly. We're going to
16558 // keep around two indices: a read and a write. If we end up trimming all of
16559 // the whitespace from a node, then we'll drop it from the list entirely.
16560 size_t write_index = 0;
16561
16562 pm_node_t *node;
16563 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16564 // We're not manipulating child nodes that aren't strings. In this case
16565 // we'll skip past it and indicate that the subsequent node should not
16566 // be dedented.
16567 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16568 nodes->nodes[write_index++] = node;
16569 dedent_next = false;
16570 continue;
16571 }
16572
16573 pm_string_node_t *string_node = ((pm_string_node_t *) node);
16574 if (dedent_next) {
16575 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
16576 }
16577
16578 if (string_node->unescaped.length == 0) {
16579 pm_node_destroy(parser, node);
16580 } else {
16581 nodes->nodes[write_index++] = node;
16582 }
16583
16584 // We always dedent the next node if it follows a string node.
16585 dedent_next = true;
16586 }
16587
16588 nodes->size = write_index;
16589}
16590
16594static pm_token_t
16595parse_strings_empty_content(const uint8_t *location) {
16596 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16597}
16598
16602static inline pm_node_t *
16603parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16604 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16605 bool concating = false;
16606
16607 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16608 pm_node_t *node = NULL;
16609
16610 // Here we have found a string literal. We'll parse it and add it to
16611 // the list of strings.
16612 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16613 assert(lex_mode->mode == PM_LEX_STRING);
16614 bool lex_interpolation = lex_mode->as.string.interpolation;
16615 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16616
16617 pm_token_t opening = parser->current;
16618 parser_lex(parser);
16619
16620 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16621 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16622 // If we get here, then we have an end immediately after a
16623 // start. In that case we'll create an empty content token and
16624 // return an uninterpolated string.
16625 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16626 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16627
16628 pm_string_shared_init(&string->unescaped, content.start, content.end);
16629 node = (pm_node_t *) string;
16630 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16631 // If we get here, then we have an end of a label immediately
16632 // after a start. In that case we'll create an empty symbol
16633 // node.
16634 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16635 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16636
16637 pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16638 node = (pm_node_t *) symbol;
16639
16640 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16641 } else if (!lex_interpolation) {
16642 // If we don't accept interpolation then we expect the string to
16643 // start with a single string content node.
16644 pm_string_t unescaped;
16645 pm_token_t content;
16646
16647 if (match1(parser, PM_TOKEN_EOF)) {
16648 unescaped = PM_STRING_EMPTY;
16649 content = not_provided(parser);
16650 } else {
16651 unescaped = parser->current_string;
16652 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16653 content = parser->previous;
16654 }
16655
16656 // It is unfortunately possible to have multiple string content
16657 // nodes in a row in the case that there's heredoc content in
16658 // the middle of the string, like this cursed example:
16659 //
16660 // <<-END+'b
16661 // a
16662 // END
16663 // c'+'d'
16664 //
16665 // In that case we need to switch to an interpolated string to
16666 // be able to contain all of the parts.
16667 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16668 pm_node_list_t parts = { 0 };
16669
16670 pm_token_t delimiters = not_provided(parser);
16671 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16672 pm_node_list_append(&parts, part);
16673
16674 do {
16675 part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16676 pm_node_list_append(&parts, part);
16677 parser_lex(parser);
16678 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16679
16680 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16681 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16682
16683 pm_node_list_free(&parts);
16684 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16685 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16686 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16687 } else if (match1(parser, PM_TOKEN_EOF)) {
16688 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16689 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16690 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16691 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16692 } else {
16693 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16694 parser->previous.start = parser->previous.end;
16695 parser->previous.type = PM_TOKEN_MISSING;
16696 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16697 }
16698 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16699 // In this case we've hit string content so we know the string
16700 // at least has something in it. We'll need to check if the
16701 // following token is the end (in which case we can return a
16702 // plain string) or if it's not then it has interpolation.
16703 pm_token_t content = parser->current;
16704 pm_string_t unescaped = parser->current_string;
16705 parser_lex(parser);
16706
16707 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16708 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16709 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16710
16711 // Kind of odd behavior, but basically if we have an
16712 // unterminated string and it ends in a newline, we back up one
16713 // character so that the error message is on the last line of
16714 // content in the string.
16715 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16716 const uint8_t *location = parser->previous.end;
16717 if (location > parser->start && location[-1] == '\n') location--;
16718 pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16719
16720 parser->previous.start = parser->previous.end;
16721 parser->previous.type = PM_TOKEN_MISSING;
16722 }
16723 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16724 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16725 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16726 } else {
16727 // If we get here, then we have interpolation so we'll need
16728 // to create a string or symbol node with interpolation.
16729 pm_node_list_t parts = { 0 };
16730 pm_token_t string_opening = not_provided(parser);
16731 pm_token_t string_closing = not_provided(parser);
16732
16733 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16734 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16735 pm_node_list_append(&parts, part);
16736
16737 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16738 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16739 pm_node_list_append(&parts, part);
16740 }
16741 }
16742
16743 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16744 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16745 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16746 } else if (match1(parser, PM_TOKEN_EOF)) {
16747 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16748 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16749 } else {
16750 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16751 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16752 }
16753
16754 pm_node_list_free(&parts);
16755 }
16756 } else {
16757 // If we get here, then the first part of the string is not plain
16758 // string content, in which case we need to parse the string as an
16759 // interpolated string.
16760 pm_node_list_t parts = { 0 };
16761 pm_node_t *part;
16762
16763 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16764 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16765 pm_node_list_append(&parts, part);
16766 }
16767 }
16768
16769 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16770 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16771 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16772 } else if (match1(parser, PM_TOKEN_EOF)) {
16773 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16774 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16775 } else {
16776 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16777 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16778 }
16779
16780 pm_node_list_free(&parts);
16781 }
16782
16783 if (current == NULL) {
16784 // If the node we just parsed is a symbol node, then we can't
16785 // concatenate it with anything else, so we can now return that
16786 // node.
16788 return node;
16789 }
16790
16791 // If we don't already have a node, then it's fine and we can just
16792 // set the result to be the node we just parsed.
16793 current = node;
16794 } else {
16795 // Otherwise we need to check the type of the node we just parsed.
16796 // If it cannot be concatenated with the previous node, then we'll
16797 // need to add a syntax error.
16799 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16800 }
16801
16802 // If we haven't already created our container for concatenation,
16803 // we'll do that now.
16804 if (!concating) {
16805 concating = true;
16806 pm_token_t bounds = not_provided(parser);
16807
16808 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16809 pm_interpolated_string_node_append(container, current);
16810 current = (pm_node_t *) container;
16811 }
16812
16813 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16814 }
16815 }
16816
16817 return current;
16818}
16819
16820#define PM_PARSE_PATTERN_SINGLE 0
16821#define PM_PARSE_PATTERN_TOP 1
16822#define PM_PARSE_PATTERN_MULTI 2
16823
16824static pm_node_t *
16825parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16826
16832static void
16833parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16834 // Skip this capture if it starts with an underscore.
16835 if (*location->start == '_') return;
16836
16837 if (pm_constant_id_list_includes(captures, capture)) {
16838 pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16839 } else {
16840 pm_constant_id_list_append(captures, capture);
16841 }
16842}
16843
16847static pm_node_t *
16848parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16849 // Now, if there are any :: operators that follow, parse them as constant
16850 // path nodes.
16851 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16852 pm_token_t delimiter = parser->previous;
16853 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16854 node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
16855 }
16856
16857 // If there is a [ or ( that follows, then this is part of a larger pattern
16858 // expression. We'll parse the inner pattern here, then modify the returned
16859 // inner pattern with our constant path attached.
16860 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16861 return node;
16862 }
16863
16864 pm_token_t opening;
16865 pm_token_t closing;
16866 pm_node_t *inner = NULL;
16867
16868 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16869 opening = parser->previous;
16870 accept1(parser, PM_TOKEN_NEWLINE);
16871
16872 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16873 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16874 accept1(parser, PM_TOKEN_NEWLINE);
16875 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16876 }
16877
16878 closing = parser->previous;
16879 } else {
16880 parser_lex(parser);
16881 opening = parser->previous;
16882 accept1(parser, PM_TOKEN_NEWLINE);
16883
16884 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16885 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16886 accept1(parser, PM_TOKEN_NEWLINE);
16887 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16888 }
16889
16890 closing = parser->previous;
16891 }
16892
16893 if (!inner) {
16894 // If there was no inner pattern, then we have something like Foo() or
16895 // Foo[]. In that case we'll create an array pattern with no requireds.
16896 return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16897 }
16898
16899 // Now that we have the inner pattern, check to see if it's an array, find,
16900 // or hash pattern. If it is, then we'll attach our constant path to it if
16901 // it doesn't already have a constant. If it's not one of those node types
16902 // or it does have a constant, then we'll create an array pattern.
16903 switch (PM_NODE_TYPE(inner)) {
16904 case PM_ARRAY_PATTERN_NODE: {
16905 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16906
16907 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16908 pattern_node->base.location.start = node->location.start;
16909 pattern_node->base.location.end = closing.end;
16910
16911 pattern_node->constant = node;
16912 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16913 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16914
16915 return (pm_node_t *) pattern_node;
16916 }
16917
16918 break;
16919 }
16920 case PM_FIND_PATTERN_NODE: {
16921 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16922
16923 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16924 pattern_node->base.location.start = node->location.start;
16925 pattern_node->base.location.end = closing.end;
16926
16927 pattern_node->constant = node;
16928 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16929 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16930
16931 return (pm_node_t *) pattern_node;
16932 }
16933
16934 break;
16935 }
16936 case PM_HASH_PATTERN_NODE: {
16937 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16938
16939 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16940 pattern_node->base.location.start = node->location.start;
16941 pattern_node->base.location.end = closing.end;
16942
16943 pattern_node->constant = node;
16944 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16945 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16946
16947 return (pm_node_t *) pattern_node;
16948 }
16949
16950 break;
16951 }
16952 default:
16953 break;
16954 }
16955
16956 // If we got here, then we didn't return one of the inner patterns by
16957 // attaching its constant. In this case we'll create an array pattern and
16958 // attach our constant to it.
16959 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16960 pm_array_pattern_node_requireds_append(pattern_node, inner);
16961 return (pm_node_t *) pattern_node;
16962}
16963
16967static pm_splat_node_t *
16968parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16969 assert(parser->previous.type == PM_TOKEN_USTAR);
16970 pm_token_t operator = parser->previous;
16971 pm_node_t *name = NULL;
16972
16973 // Rest patterns don't necessarily have a name associated with them. So we
16974 // will check for that here. If they do, then we'll add it to the local
16975 // table since this pattern will cause it to become a local variable.
16976 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16977 pm_token_t identifier = parser->previous;
16978 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
16979
16980 int depth;
16981 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16982 pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
16983 }
16984
16985 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
16986 name = (pm_node_t *) pm_local_variable_target_node_create(
16987 parser,
16988 &PM_LOCATION_TOKEN_VALUE(&identifier),
16989 constant_id,
16990 (uint32_t) (depth == -1 ? 0 : depth)
16991 );
16992 }
16993
16994 // Finally we can return the created node.
16995 return pm_splat_node_create(parser, &operator, name);
16996}
16997
17001static pm_node_t *
17002parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
17003 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
17004 parser_lex(parser);
17005
17006 pm_token_t operator = parser->previous;
17007 pm_node_t *value = NULL;
17008
17009 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
17010 return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
17011 }
17012
17013 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
17014 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17015
17016 int depth;
17017 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17018 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17019 }
17020
17021 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17022 value = (pm_node_t *) pm_local_variable_target_node_create(
17023 parser,
17024 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17025 constant_id,
17026 (uint32_t) (depth == -1 ? 0 : depth)
17027 );
17028 }
17029
17030 return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
17031}
17032
17037static bool
17038pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
17039 ptrdiff_t length = end - start;
17040 if (length == 0) return false;
17041
17042 // First ensure that it starts with a valid identifier starting character.
17043 size_t width = char_is_identifier_start(parser, start);
17044 if (width == 0) return false;
17045
17046 // Next, ensure that it's not an uppercase character.
17047 if (parser->encoding_changed) {
17048 if (parser->encoding->isupper_char(start, length)) return false;
17049 } else {
17050 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
17051 }
17052
17053 // Next, iterate through all of the bytes of the string to ensure that they
17054 // are all valid identifier characters.
17055 const uint8_t *cursor = start + width;
17056 while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
17057 return cursor == end;
17058}
17059
17064static pm_node_t *
17065parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
17066 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
17067
17068 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
17069 int depth = -1;
17070
17071 if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
17072 depth = pm_parser_local_depth_constant_id(parser, constant_id);
17073 } else {
17074 pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
17075
17076 if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
17077 PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
17078 }
17079 }
17080
17081 if (depth == -1) {
17082 pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
17083 }
17084
17085 parse_pattern_capture(parser, captures, constant_id, value_loc);
17086 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17087 parser,
17088 value_loc,
17089 constant_id,
17090 (uint32_t) (depth == -1 ? 0 : depth)
17091 );
17092
17093 return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
17094}
17095
17100static void
17101parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
17102 if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
17103 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
17104 }
17105}
17106
17111parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
17112 pm_node_list_t assocs = { 0 };
17113 pm_static_literals_t keys = { 0 };
17114 pm_node_t *rest = NULL;
17115
17116 switch (PM_NODE_TYPE(first_node)) {
17119 rest = first_node;
17120 break;
17121 case PM_SYMBOL_NODE: {
17122 if (pm_symbol_node_label_p(first_node)) {
17123 parse_pattern_hash_key(parser, &keys, first_node);
17124 pm_node_t *value;
17125
17127 // Otherwise, we will create an implicit local variable
17128 // target for the value.
17129 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
17130 } else {
17131 // Here we have a value for the first assoc in the list, so
17132 // we will parse it now.
17133 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17134 }
17135
17136 pm_token_t operator = not_provided(parser);
17137 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17138
17139 pm_node_list_append(&assocs, assoc);
17140 break;
17141 }
17142 }
17143 /* fallthrough */
17144 default: {
17145 // If we get anything else, then this is an error. For this we'll
17146 // create a missing node for the value and create an assoc node for
17147 // the first node in the list.
17148 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
17149 pm_parser_err_node(parser, first_node, diag_id);
17150
17151 pm_token_t operator = not_provided(parser);
17152 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
17153 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17154
17155 pm_node_list_append(&assocs, assoc);
17156 break;
17157 }
17158 }
17159
17160 // If there are any other assocs, then we'll parse them now.
17161 while (accept1(parser, PM_TOKEN_COMMA)) {
17162 // Here we need to break to support trailing commas.
17164 // Trailing commas are not allowed to follow a rest pattern.
17165 if (rest != NULL) {
17166 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17167 }
17168
17169 break;
17170 }
17171
17172 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
17173 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
17174
17175 if (rest == NULL) {
17176 rest = assoc;
17177 } else {
17178 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17179 pm_node_list_append(&assocs, assoc);
17180 }
17181 } else {
17182 pm_node_t *key;
17183
17184 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17185 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
17186
17188 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
17189 } else if (!pm_symbol_node_label_p(key)) {
17190 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17191 }
17192 } else {
17193 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17194 key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17195 }
17196
17197 parse_pattern_hash_key(parser, &keys, key);
17198 pm_node_t *value = NULL;
17199
17201 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
17202 } else {
17203 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17204 }
17205
17206 pm_token_t operator = not_provided(parser);
17207 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
17208
17209 if (rest != NULL) {
17210 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17211 }
17212
17213 pm_node_list_append(&assocs, assoc);
17214 }
17215 }
17216
17217 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
17218 xfree(assocs.nodes);
17219
17220 pm_static_literals_free(&keys);
17221 return node;
17222}
17223
17227static pm_node_t *
17228parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
17229 switch (parser->current.type) {
17231 case PM_TOKEN_METHOD_NAME: {
17232 parser_lex(parser);
17233 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17234
17235 int depth;
17236 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17237 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17238 }
17239
17240 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17241 return (pm_node_t *) pm_local_variable_target_node_create(
17242 parser,
17243 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17244 constant_id,
17245 (uint32_t) (depth == -1 ? 0 : depth)
17246 );
17247 }
17249 pm_token_t opening = parser->current;
17250 parser_lex(parser);
17251
17252 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17253 // If we have an empty array pattern, then we'll just return a new
17254 // array pattern node.
17255 return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
17256 }
17257
17258 // Otherwise, we'll parse the inner pattern, then deal with it depending
17259 // on the type it returns.
17260 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
17261
17262 accept1(parser, PM_TOKEN_NEWLINE);
17263 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
17264 pm_token_t closing = parser->previous;
17265
17266 switch (PM_NODE_TYPE(inner)) {
17267 case PM_ARRAY_PATTERN_NODE: {
17268 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
17269 if (pattern_node->opening_loc.start == NULL) {
17270 pattern_node->base.location.start = opening.start;
17271 pattern_node->base.location.end = closing.end;
17272
17273 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17274 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17275
17276 return (pm_node_t *) pattern_node;
17277 }
17278
17279 break;
17280 }
17281 case PM_FIND_PATTERN_NODE: {
17282 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17283 if (pattern_node->opening_loc.start == NULL) {
17284 pattern_node->base.location.start = opening.start;
17285 pattern_node->base.location.end = closing.end;
17286
17287 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17288 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17289
17290 return (pm_node_t *) pattern_node;
17291 }
17292
17293 break;
17294 }
17295 default:
17296 break;
17297 }
17298
17299 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
17300 pm_array_pattern_node_requireds_append(node, inner);
17301 return (pm_node_t *) node;
17302 }
17303 case PM_TOKEN_BRACE_LEFT: {
17304 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17305 parser->pattern_matching_newlines = false;
17306
17308 pm_token_t opening = parser->current;
17309 parser_lex(parser);
17310
17311 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
17312 // If we have an empty hash pattern, then we'll just return a new hash
17313 // pattern node.
17314 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
17315 } else {
17316 pm_node_t *first_node;
17317
17318 switch (parser->current.type) {
17319 case PM_TOKEN_LABEL:
17320 parser_lex(parser);
17321 first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17322 break;
17324 first_node = parse_pattern_keyword_rest(parser, captures);
17325 break;
17327 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
17328 break;
17329 default: {
17330 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
17331 parser_lex(parser);
17332
17333 first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
17334 break;
17335 }
17336 }
17337
17338 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17339
17340 accept1(parser, PM_TOKEN_NEWLINE);
17341 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
17342 pm_token_t closing = parser->previous;
17343
17344 node->base.location.start = opening.start;
17345 node->base.location.end = closing.end;
17346
17347 node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17348 node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17349 }
17350
17351 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17352 return (pm_node_t *) node;
17353 }
17354 case PM_TOKEN_UDOT_DOT:
17355 case PM_TOKEN_UDOT_DOT_DOT: {
17356 pm_token_t operator = parser->current;
17357 parser_lex(parser);
17358
17359 // Since we have a unary range operator, we need to parse the subsequent
17360 // expression as the right side of the range.
17361 switch (parser->current.type) {
17362 case PM_CASE_PRIMITIVE: {
17363 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17364 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17365 }
17366 default: {
17367 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17368 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17369 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17370 }
17371 }
17372 }
17373 case PM_CASE_PRIMITIVE: {
17374 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
17375
17376 // If we found a label, we need to immediately return to the caller.
17377 if (pm_symbol_node_label_p(node)) return node;
17378
17379 // Now that we have a primitive, we need to check if it's part of a range.
17380 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17381 pm_token_t operator = parser->previous;
17382
17383 // Now that we have the operator, we need to check if this is followed
17384 // by another expression. If it is, then we will create a full range
17385 // node. Otherwise, we'll create an endless range.
17386 switch (parser->current.type) {
17387 case PM_CASE_PRIMITIVE: {
17388 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17389 return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
17390 }
17391 default:
17392 return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
17393 }
17394 }
17395
17396 return node;
17397 }
17398 case PM_TOKEN_CARET: {
17399 parser_lex(parser);
17400 pm_token_t operator = parser->previous;
17401
17402 // At this point we have a pin operator. We need to check the subsequent
17403 // expression to determine if it's a variable or an expression.
17404 switch (parser->current.type) {
17405 case PM_TOKEN_IDENTIFIER: {
17406 parser_lex(parser);
17407 pm_node_t *variable = (pm_node_t *) parse_variable(parser);
17408
17409 if (variable == NULL) {
17410 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17411 variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
17412 }
17413
17414 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17415 }
17417 parser_lex(parser);
17418 pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
17419
17420 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17421 }
17423 parser_lex(parser);
17424 pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
17425
17426 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17427 }
17429 parser_lex(parser);
17430 pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
17431
17432 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17433 }
17435 parser_lex(parser);
17436 pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
17437
17438 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17439 }
17441 parser_lex(parser);
17442 pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
17443
17444 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17445 }
17447 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17448 parser->pattern_matching_newlines = false;
17449
17450 pm_token_t lparen = parser->current;
17451 parser_lex(parser);
17452
17453 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17454 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17455
17456 accept1(parser, PM_TOKEN_NEWLINE);
17457 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17458 return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
17459 }
17460 default: {
17461 // If we get here, then we have a pin operator followed by something
17462 // not understood. We'll create a missing node and return that.
17463 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17464 pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17465 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17466 }
17467 }
17468 }
17469 case PM_TOKEN_UCOLON_COLON: {
17470 pm_token_t delimiter = parser->current;
17471 parser_lex(parser);
17472
17473 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17474 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17475
17476 return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
17477 }
17478 case PM_TOKEN_CONSTANT: {
17479 pm_token_t constant = parser->current;
17480 parser_lex(parser);
17481
17482 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
17483 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17484 }
17485 default:
17486 pm_parser_err_current(parser, diag_id);
17487 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17488 }
17489}
17490
17495static pm_node_t *
17496parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17497 pm_node_t *node = first_node;
17498
17499 while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
17500 pm_token_t operator = parser->previous;
17501
17502 switch (parser->current.type) {
17506 case PM_TOKEN_CARET:
17507 case PM_TOKEN_CONSTANT:
17509 case PM_TOKEN_UDOT_DOT:
17511 case PM_CASE_PRIMITIVE: {
17512 if (node == NULL) {
17513 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17514 } else {
17515 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17516 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17517 }
17518
17519 break;
17520 }
17523 pm_token_t opening = parser->current;
17524 parser_lex(parser);
17525
17526 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17527 accept1(parser, PM_TOKEN_NEWLINE);
17528 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17529 pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
17530
17531 if (node == NULL) {
17532 node = right;
17533 } else {
17534 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17535 }
17536
17537 break;
17538 }
17539 default: {
17540 pm_parser_err_current(parser, diag_id);
17541 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17542
17543 if (node == NULL) {
17544 node = right;
17545 } else {
17546 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17547 }
17548
17549 break;
17550 }
17551 }
17552 }
17553
17554 // If we have an =>, then we are assigning this pattern to a variable.
17555 // In this case we should create an assignment node.
17556 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17557 pm_token_t operator = parser->previous;
17558 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17559
17560 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17561 int depth;
17562
17563 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17564 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17565 }
17566
17567 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17568 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17569 parser,
17570 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17571 constant_id,
17572 (uint32_t) (depth == -1 ? 0 : depth)
17573 );
17574
17575 node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
17576 }
17577
17578 return node;
17579}
17580
17584static pm_node_t *
17585parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17586 pm_node_t *node = NULL;
17587
17588 bool leading_rest = false;
17589 bool trailing_rest = false;
17590
17591 switch (parser->current.type) {
17592 case PM_TOKEN_LABEL: {
17593 parser_lex(parser);
17594 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17595 node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
17596
17597 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17598 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17599 }
17600
17601 return node;
17602 }
17603 case PM_TOKEN_USTAR_STAR: {
17604 node = parse_pattern_keyword_rest(parser, captures);
17605 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17606
17607 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17608 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17609 }
17610
17611 return node;
17612 }
17613 case PM_TOKEN_STRING_BEGIN: {
17614 // We need special handling for string beginnings because they could
17615 // be dynamic symbols leading to hash patterns.
17616 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17617
17618 if (pm_symbol_node_label_p(node)) {
17619 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17620
17621 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17622 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17623 }
17624
17625 return node;
17626 }
17627
17628 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17629 break;
17630 }
17631 case PM_TOKEN_USTAR: {
17632 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17633 parser_lex(parser);
17634 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17635 leading_rest = true;
17636 break;
17637 }
17638 }
17639 /* fallthrough */
17640 default:
17641 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17642 break;
17643 }
17644
17645 // If we got a dynamic label symbol, then we need to treat it like the
17646 // beginning of a hash pattern.
17647 if (pm_symbol_node_label_p(node)) {
17648 return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17649 }
17650
17651 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17652 // If we have a comma, then we are now parsing either an array pattern
17653 // or a find pattern. We need to parse all of the patterns, put them
17654 // into a big list, and then determine which type of node we have.
17655 pm_node_list_t nodes = { 0 };
17656 pm_node_list_append(&nodes, node);
17657
17658 // Gather up all of the patterns into the list.
17659 while (accept1(parser, PM_TOKEN_COMMA)) {
17660 // Break early here in case we have a trailing comma.
17662 node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17663 pm_node_list_append(&nodes, node);
17664 trailing_rest = true;
17665 break;
17666 }
17667
17668 if (accept1(parser, PM_TOKEN_USTAR)) {
17669 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17670
17671 // If we have already parsed a splat pattern, then this is an
17672 // error. We will continue to parse the rest of the patterns,
17673 // but we will indicate it as an error.
17674 if (trailing_rest) {
17675 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17676 }
17677
17678 trailing_rest = true;
17679 } else {
17680 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17681 }
17682
17683 pm_node_list_append(&nodes, node);
17684 }
17685
17686 // If the first pattern and the last pattern are rest patterns, then we
17687 // will call this a find pattern, regardless of how many rest patterns
17688 // are in between because we know we already added the appropriate
17689 // errors. Otherwise we will create an array pattern.
17690 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17691 node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
17692
17693 if (nodes.size == 2) {
17694 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17695 }
17696 } else {
17697 node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17698
17699 if (leading_rest && trailing_rest) {
17700 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17701 }
17702 }
17703
17704 xfree(nodes.nodes);
17705 } else if (leading_rest) {
17706 // Otherwise, if we parsed a single splat pattern, then we know we have
17707 // an array pattern, so we can go ahead and create that node.
17708 node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17709 }
17710
17711 return node;
17712}
17713
17719static inline void
17720parse_negative_numeric(pm_node_t *node) {
17721 switch (PM_NODE_TYPE(node)) {
17722 case PM_INTEGER_NODE: {
17723 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17724 cast->base.location.start--;
17725 cast->value.negative = true;
17726 break;
17727 }
17728 case PM_FLOAT_NODE: {
17729 pm_float_node_t *cast = (pm_float_node_t *) node;
17730 cast->base.location.start--;
17731 cast->value = -cast->value;
17732 break;
17733 }
17734 case PM_RATIONAL_NODE: {
17735 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17736 cast->base.location.start--;
17737 cast->numerator.negative = true;
17738 break;
17739 }
17740 case PM_IMAGINARY_NODE:
17741 node->location.start--;
17742 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17743 break;
17744 default:
17745 assert(false && "unreachable");
17746 break;
17747 }
17748}
17749
17755static void
17756pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17757 switch (diag_id) {
17758 case PM_ERR_HASH_KEY: {
17759 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17760 break;
17761 }
17762 case PM_ERR_HASH_VALUE:
17763 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17764 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17765 break;
17766 }
17767 case PM_ERR_UNARY_RECEIVER: {
17768 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17769 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17770 break;
17771 }
17772 case PM_ERR_UNARY_DISALLOWED:
17773 case PM_ERR_EXPECT_ARGUMENT: {
17774 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17775 break;
17776 }
17777 default:
17778 pm_parser_err_previous(parser, diag_id);
17779 break;
17780 }
17781}
17782
17786static void
17787parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17788#define CONTEXT_NONE 0
17789#define CONTEXT_THROUGH_ENSURE 1
17790#define CONTEXT_THROUGH_ELSE 2
17791
17792 pm_context_node_t *context_node = parser->current_context;
17793 int context = CONTEXT_NONE;
17794
17795 while (context_node != NULL) {
17796 switch (context_node->context) {
17804 case PM_CONTEXT_DEFINED:
17806 // These are the good cases. We're allowed to have a retry here.
17807 return;
17808 case PM_CONTEXT_CLASS:
17809 case PM_CONTEXT_DEF:
17811 case PM_CONTEXT_MAIN:
17812 case PM_CONTEXT_MODULE:
17813 case PM_CONTEXT_PREEXE:
17814 case PM_CONTEXT_SCLASS:
17815 // These are the bad cases. We're not allowed to have a retry in
17816 // these contexts.
17817 if (context == CONTEXT_NONE) {
17818 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17819 } else if (context == CONTEXT_THROUGH_ENSURE) {
17820 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17821 } else if (context == CONTEXT_THROUGH_ELSE) {
17822 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17823 }
17824 return;
17832 // These are also bad cases, but with a more specific error
17833 // message indicating the else.
17834 context = CONTEXT_THROUGH_ELSE;
17835 break;
17843 // These are also bad cases, but with a more specific error
17844 // message indicating the ensure.
17845 context = CONTEXT_THROUGH_ENSURE;
17846 break;
17847 case PM_CONTEXT_NONE:
17848 // This case should never happen.
17849 assert(false && "unreachable");
17850 break;
17851 case PM_CONTEXT_BEGIN:
17854 case PM_CONTEXT_CASE_IN:
17857 case PM_CONTEXT_ELSE:
17858 case PM_CONTEXT_ELSIF:
17859 case PM_CONTEXT_EMBEXPR:
17861 case PM_CONTEXT_FOR:
17862 case PM_CONTEXT_IF:
17867 case PM_CONTEXT_PARENS:
17868 case PM_CONTEXT_POSTEXE:
17870 case PM_CONTEXT_TERNARY:
17871 case PM_CONTEXT_UNLESS:
17872 case PM_CONTEXT_UNTIL:
17873 case PM_CONTEXT_WHILE:
17874 // In these contexts we should continue walking up the list of
17875 // contexts.
17876 break;
17877 }
17878
17879 context_node = context_node->prev;
17880 }
17881
17882#undef CONTEXT_NONE
17883#undef CONTEXT_ENSURE
17884#undef CONTEXT_ELSE
17885}
17886
17890static void
17891parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17892 pm_context_node_t *context_node = parser->current_context;
17893
17894 while (context_node != NULL) {
17895 switch (context_node->context) {
17896 case PM_CONTEXT_DEF:
17898 case PM_CONTEXT_DEFINED:
17902 // These are the good cases. We're allowed to have a block exit
17903 // in these contexts.
17904 return;
17905 case PM_CONTEXT_CLASS:
17909 case PM_CONTEXT_MAIN:
17910 case PM_CONTEXT_MODULE:
17914 case PM_CONTEXT_SCLASS:
17918 // These are the bad cases. We're not allowed to have a retry in
17919 // these contexts.
17920 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17921 return;
17922 case PM_CONTEXT_NONE:
17923 // This case should never happen.
17924 assert(false && "unreachable");
17925 break;
17926 case PM_CONTEXT_BEGIN:
17935 case PM_CONTEXT_CASE_IN:
17938 case PM_CONTEXT_ELSE:
17939 case PM_CONTEXT_ELSIF:
17940 case PM_CONTEXT_EMBEXPR:
17942 case PM_CONTEXT_FOR:
17943 case PM_CONTEXT_IF:
17951 case PM_CONTEXT_PARENS:
17952 case PM_CONTEXT_POSTEXE:
17954 case PM_CONTEXT_PREEXE:
17956 case PM_CONTEXT_TERNARY:
17957 case PM_CONTEXT_UNLESS:
17958 case PM_CONTEXT_UNTIL:
17959 case PM_CONTEXT_WHILE:
17960 // In these contexts we should continue walking up the list of
17961 // contexts.
17962 break;
17963 }
17964
17965 context_node = context_node->prev;
17966 }
17967}
17968
17973typedef struct {
17976
17978 const uint8_t *start;
17979
17981 const uint8_t *end;
17982
17991
17996static void
17997parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17999 pm_location_t location;
18000
18001 if (callback_data->shared) {
18002 location = (pm_location_t) { .start = start, .end = end };
18003 } else {
18004 location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
18005 }
18006
18007 PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
18008}
18009
18013static void
18014parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
18015 const pm_string_t *unescaped = &node->unescaped;
18017 .parser = parser,
18018 .start = node->base.location.start,
18019 .end = node->base.location.end,
18020 .shared = unescaped->type == PM_STRING_SHARED
18021 };
18022
18023 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
18024}
18025
18029static inline pm_node_t *
18030parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
18031 switch (parser->current.type) {
18033 parser_lex(parser);
18034
18035 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
18036 pm_accepts_block_stack_push(parser, true);
18037 bool parsed_bare_hash = false;
18038
18039 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
18040 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
18041
18042 // Handle the case where we don't have a comma and we have a
18043 // newline followed by a right bracket.
18044 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18045 break;
18046 }
18047
18048 // Ensure that we have a comma between elements in the array.
18049 if (array->elements.size > 0) {
18050 if (accept1(parser, PM_TOKEN_COMMA)) {
18051 // If there was a comma but we also accepts a newline,
18052 // then this is a syntax error.
18053 if (accepted_newline) {
18054 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
18055 }
18056 } else {
18057 // If there was no comma, then we need to add a syntax
18058 // error.
18059 const uint8_t *location = parser->previous.end;
18060 PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
18061
18062 parser->previous.start = location;
18063 parser->previous.type = PM_TOKEN_MISSING;
18064 }
18065 }
18066
18067 // If we have a right bracket immediately following a comma,
18068 // this is allowed since it's a trailing comma. In this case we
18069 // can break out of the loop.
18070 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
18071
18072 pm_node_t *element;
18073
18074 if (accept1(parser, PM_TOKEN_USTAR)) {
18075 pm_token_t operator = parser->previous;
18076 pm_node_t *expression = NULL;
18077
18078 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
18079 pm_parser_scope_forwarding_positionals_check(parser, &operator);
18080 } else {
18081 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18082 }
18083
18084 element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
18085 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
18086 if (parsed_bare_hash) {
18087 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
18088 }
18089
18090 element = (pm_node_t *) pm_keyword_hash_node_create(parser);
18091 pm_static_literals_t hash_keys = { 0 };
18092
18094 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18095 }
18096
18097 pm_static_literals_free(&hash_keys);
18098 parsed_bare_hash = true;
18099 } else {
18100 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
18101
18102 if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
18103 if (parsed_bare_hash) {
18104 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
18105 }
18106
18107 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
18108 pm_static_literals_t hash_keys = { 0 };
18109 pm_hash_key_static_literals_add(parser, &hash_keys, element);
18110
18111 pm_token_t operator;
18112 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
18113 operator = parser->previous;
18114 } else {
18115 operator = not_provided(parser);
18116 }
18117
18118 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
18119 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
18120 pm_keyword_hash_node_elements_append(hash, assoc);
18121
18122 element = (pm_node_t *) hash;
18123 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18124 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18125 }
18126
18127 pm_static_literals_free(&hash_keys);
18128 parsed_bare_hash = true;
18129 }
18130 }
18131
18132 pm_array_node_elements_append(array, element);
18133 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
18134 }
18135
18136 accept1(parser, PM_TOKEN_NEWLINE);
18137
18138 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18139 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
18140 parser->previous.start = parser->previous.end;
18141 parser->previous.type = PM_TOKEN_MISSING;
18142 }
18143
18144 pm_array_node_close_set(array, &parser->previous);
18145 pm_accepts_block_stack_pop(parser);
18146
18147 return (pm_node_t *) array;
18148 }
18151 pm_token_t opening = parser->current;
18152
18153 pm_node_list_t current_block_exits = { 0 };
18154 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18155
18156 parser_lex(parser);
18157 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
18158
18159 // If this is the end of the file or we match a right parenthesis, then
18160 // we have an empty parentheses node, and we can immediately return.
18161 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18162 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18163
18164 pop_block_exits(parser, previous_block_exits);
18165 pm_node_list_free(&current_block_exits);
18166
18167 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous);
18168 }
18169
18170 // Otherwise, we're going to parse the first statement in the list
18171 // of statements within the parentheses.
18172 pm_accepts_block_stack_push(parser, true);
18173 context_push(parser, PM_CONTEXT_PARENS);
18174 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18175 context_pop(parser);
18176
18177 // Determine if this statement is followed by a terminator. In the
18178 // case of a single statement, this is fine. But in the case of
18179 // multiple statements it's required.
18180 bool terminator_found = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18181 if (terminator_found) {
18182 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18183 }
18184
18185 // If we hit a right parenthesis, then we're done parsing the
18186 // parentheses node, and we can check which kind of node we should
18187 // return.
18188 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18190 lex_state_set(parser, PM_LEX_STATE_ENDARG);
18191 }
18192
18193 parser_lex(parser);
18194 pm_accepts_block_stack_pop(parser);
18195
18196 pop_block_exits(parser, previous_block_exits);
18197 pm_node_list_free(&current_block_exits);
18198
18199 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18200 // If we have a single statement and are ending on a right
18201 // parenthesis, then we need to check if this is possibly a
18202 // multiple target node.
18203 pm_multi_target_node_t *multi_target;
18204
18205 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
18206 multi_target = (pm_multi_target_node_t *) statement;
18207 } else {
18208 multi_target = pm_multi_target_node_create(parser);
18209 pm_multi_target_node_targets_append(parser, multi_target, statement);
18210 }
18211
18212 pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18213 pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
18214
18215 multi_target->lparen_loc = lparen_loc;
18216 multi_target->rparen_loc = rparen_loc;
18217 multi_target->base.location.start = lparen_loc.start;
18218 multi_target->base.location.end = rparen_loc.end;
18219
18220 pm_node_t *result;
18221 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18222 result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18223 accept1(parser, PM_TOKEN_NEWLINE);
18224 } else {
18225 result = (pm_node_t *) multi_target;
18226 }
18227
18228 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18229 // All set, this is explicitly allowed by the parent
18230 // context.
18231 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18232 // All set, we're inside a for loop and we're parsing
18233 // multiple targets.
18234 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18235 // Multi targets are not allowed when it's not a
18236 // statement level.
18237 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18238 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18239 // Multi targets must be followed by an equal sign in
18240 // order to be valid (or a right parenthesis if they are
18241 // nested).
18242 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18243 }
18244
18245 return result;
18246 }
18247
18248 // If we have a single statement and are ending on a right parenthesis
18249 // and we didn't return a multiple assignment node, then we can return a
18250 // regular parentheses node now.
18251 pm_statements_node_t *statements = pm_statements_node_create(parser);
18252 pm_statements_node_body_append(parser, statements, statement, true);
18253
18254 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
18255 }
18256
18257 // If we have more than one statement in the set of parentheses,
18258 // then we are going to parse all of them as a list of statements.
18259 // We'll do that here.
18260 context_push(parser, PM_CONTEXT_PARENS);
18261 pm_statements_node_t *statements = pm_statements_node_create(parser);
18262 pm_statements_node_body_append(parser, statements, statement, true);
18263
18264 // If we didn't find a terminator and we didn't find a right
18265 // parenthesis, then this is a syntax error.
18266 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18267 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18268 }
18269
18270 // Parse each statement within the parentheses.
18271 while (true) {
18272 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18273 pm_statements_node_body_append(parser, statements, node, true);
18274
18275 // If we're recovering from a syntax error, then we need to stop
18276 // parsing the statements now.
18277 if (parser->recovering) {
18278 // If this is the level of context where the recovery has
18279 // happened, then we can mark the parser as done recovering.
18280 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
18281 break;
18282 }
18283
18284 // If we couldn't parse an expression at all, then we need to
18285 // bail out of the loop.
18286 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
18287
18288 // If we successfully parsed a statement, then we are going to
18289 // need terminator to delimit them.
18290 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18291 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18292 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
18293 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18294 break;
18295 } else if (!match1(parser, PM_TOKEN_EOF)) {
18296 // If we're at the end of the file, then we're going to add
18297 // an error after this for the ) anyway.
18298 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18299 }
18300 }
18301
18302 context_pop(parser);
18303 pm_accepts_block_stack_pop(parser);
18304 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18305
18306 // When we're parsing multi targets, we allow them to be followed by
18307 // a right parenthesis if they are at the statement level. This is
18308 // only possible if they are the final statement in a parentheses.
18309 // We need to explicitly reject that here.
18310 {
18311 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
18312
18313 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18314 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
18315 pm_multi_target_node_targets_append(parser, multi_target, statement);
18316
18317 statement = (pm_node_t *) multi_target;
18318 statements->body.nodes[statements->body.size - 1] = statement;
18319 }
18320
18321 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
18322 const uint8_t *offset = statement->location.end;
18323 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
18324 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
18325
18326 statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
18327 statements->body.nodes[statements->body.size - 1] = statement;
18328
18329 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
18330 }
18331 }
18332
18333 pop_block_exits(parser, previous_block_exits);
18334 pm_node_list_free(&current_block_exits);
18335
18336 pm_void_statements_check(parser, statements, true);
18337 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
18338 }
18339 case PM_TOKEN_BRACE_LEFT: {
18340 // If we were passed a current_hash_keys via the parser, then that
18341 // means we're already parsing a hash and we want to share the set
18342 // of hash keys with this inner hash we're about to parse for the
18343 // sake of warnings. We'll set it to NULL after we grab it to make
18344 // sure subsequent expressions don't use it. Effectively this is a
18345 // way of getting around passing it to every call to
18346 // parse_expression.
18347 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
18348 parser->current_hash_keys = NULL;
18349
18350 pm_accepts_block_stack_push(parser, true);
18351 parser_lex(parser);
18352
18353 pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
18354
18355 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
18356 if (current_hash_keys != NULL) {
18357 parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18358 } else {
18359 pm_static_literals_t hash_keys = { 0 };
18360 parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18361 pm_static_literals_free(&hash_keys);
18362 }
18363
18364 accept1(parser, PM_TOKEN_NEWLINE);
18365 }
18366
18367 pm_accepts_block_stack_pop(parser);
18368 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
18369 pm_hash_node_closing_loc_set(node, &parser->previous);
18370
18371 return (pm_node_t *) node;
18372 }
18374 parser_lex(parser);
18375
18376 pm_token_t opening = parser->previous;
18377 opening.type = PM_TOKEN_STRING_BEGIN;
18378 opening.end = opening.start + 1;
18379
18380 pm_token_t content = parser->previous;
18381 content.type = PM_TOKEN_STRING_CONTENT;
18382 content.start = content.start + 1;
18383
18384 pm_token_t closing = not_provided(parser);
18385 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
18386 pm_node_flag_set(node, parse_unescaped_encoding(parser));
18387
18388 // Characters can be followed by strings in which case they are
18389 // automatically concatenated.
18390 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18391 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18392 }
18393
18394 return node;
18395 }
18397 parser_lex(parser);
18398 pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
18399
18400 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18401 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18402 }
18403
18404 return node;
18405 }
18406 case PM_TOKEN_CONSTANT: {
18407 parser_lex(parser);
18408 pm_token_t constant = parser->previous;
18409
18410 // If a constant is immediately followed by parentheses, then this is in
18411 // fact a method call, not a constant read.
18412 if (
18413 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
18414 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18415 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18416 match1(parser, PM_TOKEN_BRACE_LEFT)
18417 ) {
18418 pm_arguments_t arguments = { 0 };
18419 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18420 return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
18421 }
18422
18423 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18424
18425 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18426 // If we get here, then we have a comma immediately following a
18427 // constant, so we're going to parse this as a multiple assignment.
18428 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18429 }
18430
18431 return node;
18432 }
18433 case PM_TOKEN_UCOLON_COLON: {
18434 parser_lex(parser);
18435 pm_token_t delimiter = parser->previous;
18436
18437 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18438 pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
18439
18440 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18441 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18442 }
18443
18444 return node;
18445 }
18446 case PM_TOKEN_UDOT_DOT:
18447 case PM_TOKEN_UDOT_DOT_DOT: {
18448 pm_token_t operator = parser->current;
18449 parser_lex(parser);
18450
18451 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
18452
18453 // Unary .. and ... are special because these are non-associative
18454 // operators that can also be unary operators. In this case we need
18455 // to explicitly reject code that has a .. or ... that follows this
18456 // expression.
18457 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
18458 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
18459 }
18460
18461 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
18462 }
18463 case PM_TOKEN_FLOAT:
18464 parser_lex(parser);
18465 return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
18467 parser_lex(parser);
18468 return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
18470 parser_lex(parser);
18471 return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
18473 parser_lex(parser);
18474 return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
18476 parser_lex(parser);
18477 pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
18478
18479 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18480 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18481 }
18482
18483 return node;
18484 }
18486 parser_lex(parser);
18487 pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
18488
18489 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18490 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18491 }
18492
18493 return node;
18494 }
18496 parser_lex(parser);
18497 pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
18498
18499 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18500 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18501 }
18502
18503 return node;
18504 }
18506 case PM_TOKEN_METHOD_NAME: {
18507 parser_lex(parser);
18508 pm_token_t identifier = parser->previous;
18509 pm_node_t *node = parse_variable_call(parser);
18510
18511 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
18512 // If parse_variable_call returned with a call node, then we
18513 // know the identifier is not in the local table. In that case
18514 // we need to check if there are arguments following the
18515 // identifier.
18516 pm_call_node_t *call = (pm_call_node_t *) node;
18517 pm_arguments_t arguments = { 0 };
18518
18519 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
18520 // Since we found arguments, we need to turn off the
18521 // variable call bit in the flags.
18522 pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
18523
18524 call->opening_loc = arguments.opening_loc;
18525 call->arguments = arguments.arguments;
18526 call->closing_loc = arguments.closing_loc;
18527 call->block = arguments.block;
18528
18529 if (arguments.block != NULL) {
18530 call->base.location.end = arguments.block->location.end;
18531 } else if (arguments.closing_loc.start == NULL) {
18532 if (arguments.arguments != NULL) {
18533 call->base.location.end = arguments.arguments->base.location.end;
18534 } else {
18535 call->base.location.end = call->message_loc.end;
18536 }
18537 } else {
18538 call->base.location.end = arguments.closing_loc.end;
18539 }
18540 }
18541 } else {
18542 // Otherwise, we know the identifier is in the local table. This
18543 // can still be a method call if it is followed by arguments or
18544 // a block, so we need to check for that here.
18545 if (
18546 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18547 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18548 match1(parser, PM_TOKEN_BRACE_LEFT)
18549 ) {
18550 pm_arguments_t arguments = { 0 };
18551 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18552 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
18553
18555 // If we're about to convert an 'it' implicit local
18556 // variable read into a method call, we need to remove
18557 // it from the list of implicit local variables.
18558 parse_target_implicit_parameter(parser, node);
18559 } else {
18560 // Otherwise, we're about to convert a regular local
18561 // variable read into a method call, in which case we
18562 // need to indicate that this was not a read for the
18563 // purposes of warnings.
18565
18566 if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
18567 parse_target_implicit_parameter(parser, node);
18568 } else {
18570 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
18571 }
18572 }
18573
18574 pm_node_destroy(parser, node);
18575 return (pm_node_t *) fcall;
18576 }
18577 }
18578
18579 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18580 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18581 }
18582
18583 return node;
18584 }
18586 // Here we have found a heredoc. We'll parse it and add it to the
18587 // list of strings.
18588 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18589 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18590
18591 size_t common_whitespace = (size_t) -1;
18592 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18593
18594 parser_lex(parser);
18595 pm_token_t opening = parser->previous;
18596
18597 pm_node_t *node;
18598 pm_node_t *part;
18599
18600 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18601 // If we get here, then we have an empty heredoc. We'll create
18602 // an empty content token and return an empty string node.
18603 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18604 pm_token_t content = parse_strings_empty_content(parser->previous.start);
18605
18606 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18607 node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18608 } else {
18609 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18610 }
18611
18612 node->location.end = opening.end;
18613 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18614 // If we get here, then we tried to find something in the
18615 // heredoc but couldn't actually parse anything, so we'll just
18616 // return a missing node.
18617 //
18618 // parse_string_part handles its own errors, so there is no need
18619 // for us to add one here.
18620 node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
18621 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18622 // If we get here, then the part that we parsed was plain string
18623 // content and we're at the end of the heredoc, so we can return
18624 // just a string node with the heredoc opening and closing as
18625 // its opening and closing.
18626 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18627 pm_string_node_t *cast = (pm_string_node_t *) part;
18628
18629 cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18630 cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18631 cast->base.location = cast->opening_loc;
18632
18633 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18634 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18635 cast->base.type = PM_X_STRING_NODE;
18636 }
18637
18638 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18639 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18640 }
18641
18642 node = (pm_node_t *) cast;
18643 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18644 } else {
18645 // If we get here, then we have multiple parts in the heredoc,
18646 // so we'll need to create an interpolated string node to hold
18647 // them all.
18648 pm_node_list_t parts = { 0 };
18649 pm_node_list_append(&parts, part);
18650
18651 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18652 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18653 pm_node_list_append(&parts, part);
18654 }
18655 }
18656
18657 // Now that we have all of the parts, create the correct type of
18658 // interpolated node.
18659 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18660 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18661 cast->parts = parts;
18662
18663 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18664 pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18665
18666 cast->base.location = cast->opening_loc;
18667 node = (pm_node_t *) cast;
18668 } else {
18669 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18670 pm_node_list_free(&parts);
18671
18672 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18673 pm_interpolated_string_node_closing_set(cast, &parser->previous);
18674
18675 cast->base.location = cast->opening_loc;
18676 node = (pm_node_t *) cast;
18677 }
18678
18679 // If this is a heredoc that is indented with a ~, then we need
18680 // to dedent each line by the common leading whitespace.
18681 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18682 pm_node_list_t *nodes;
18683 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18684 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18685 } else {
18686 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18687 }
18688
18689 parse_heredoc_dedent(parser, nodes, common_whitespace);
18690 }
18691 }
18692
18693 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18694 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18695 }
18696
18697 return node;
18698 }
18700 parser_lex(parser);
18701 pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
18702
18703 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18704 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18705 }
18706
18707 return node;
18708 }
18709 case PM_TOKEN_INTEGER: {
18710 pm_node_flags_t base = parser->integer_base;
18711 parser_lex(parser);
18712 return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
18713 }
18715 pm_node_flags_t base = parser->integer_base;
18716 parser_lex(parser);
18717 return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
18718 }
18720 pm_node_flags_t base = parser->integer_base;
18721 parser_lex(parser);
18722 return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
18723 }
18725 pm_node_flags_t base = parser->integer_base;
18726 parser_lex(parser);
18727 return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
18728 }
18730 parser_lex(parser);
18731 return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
18733 parser_lex(parser);
18734 return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
18736 parser_lex(parser);
18737 return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
18739 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18740 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18741 }
18742
18743 parser_lex(parser);
18744 pm_token_t keyword = parser->previous;
18745
18746 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18747 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18748
18749 switch (PM_NODE_TYPE(new_name)) {
18755 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18756 }
18757 } else {
18758 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18759 }
18760
18761 return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
18762 }
18763 case PM_SYMBOL_NODE:
18766 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18767 }
18768 }
18769 /* fallthrough */
18770 default:
18771 return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18772 }
18773 }
18774 case PM_TOKEN_KEYWORD_CASE: {
18775 size_t opening_newline_index = token_newline_index(parser);
18776 parser_lex(parser);
18777
18778 pm_token_t case_keyword = parser->previous;
18779 pm_node_t *predicate = NULL;
18780
18781 pm_node_list_t current_block_exits = { 0 };
18782 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18783
18784 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18785 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18786 predicate = NULL;
18787 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18788 predicate = NULL;
18789 } else if (!token_begins_expression_p(parser->current.type)) {
18790 predicate = NULL;
18791 } else {
18792 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18793 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18794 }
18795
18796 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18797 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18798 parser_lex(parser);
18799
18800 pop_block_exits(parser, previous_block_exits);
18801 pm_node_list_free(&current_block_exits);
18802
18803 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18804 return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
18805 }
18806
18807 // At this point we can create a case node, though we don't yet know
18808 // if it is a case-in or case-when node.
18809 pm_token_t end_keyword = not_provided(parser);
18810 pm_node_t *node;
18811
18812 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18813 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18814 pm_static_literals_t literals = { 0 };
18815
18816 // At this point we've seen a when keyword, so we know this is a
18817 // case-when node. We will continue to parse the when nodes
18818 // until we hit the end of the list.
18819 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18820 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18821 parser_lex(parser);
18822
18823 pm_token_t when_keyword = parser->previous;
18824 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18825
18826 do {
18827 if (accept1(parser, PM_TOKEN_USTAR)) {
18828 pm_token_t operator = parser->previous;
18829 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18830
18831 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18832 pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
18833
18834 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18835 } else {
18836 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18837 pm_when_node_conditions_append(when_node, condition);
18838
18839 // If we found a missing node, then this is a syntax
18840 // error and we should stop looping.
18841 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18842
18843 // If this is a string node, then we need to mark it
18844 // as frozen because when clause strings are frozen.
18845 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18846 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18847 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18848 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18849 }
18850
18851 pm_when_clause_static_literals_add(parser, &literals, condition);
18852 }
18853 } while (accept1(parser, PM_TOKEN_COMMA));
18854
18855 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18856 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18857 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18858 }
18859 } else {
18860 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18861 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18862 }
18863
18865 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18866 if (statements != NULL) {
18867 pm_when_node_statements_set(when_node, statements);
18868 }
18869 }
18870
18871 pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
18872 }
18873
18874 // If we didn't parse any conditions (in or when) then we need
18875 // to indicate that we have an error.
18876 if (case_node->conditions.size == 0) {
18877 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18878 }
18879
18880 pm_static_literals_free(&literals);
18881 node = (pm_node_t *) case_node;
18882 } else {
18883 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
18884
18885 // If this is a case-match node (i.e., it is a pattern matching
18886 // case statement) then we must have a predicate.
18887 if (predicate == NULL) {
18888 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18889 }
18890
18891 // At this point we expect that we're parsing a case-in node. We
18892 // will continue to parse the in nodes until we hit the end of
18893 // the list.
18894 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18895 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18896
18897 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18898 parser->pattern_matching_newlines = true;
18899
18900 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18901 parser->command_start = false;
18902 parser_lex(parser);
18903
18904 pm_token_t in_keyword = parser->previous;
18905
18906 pm_constant_id_list_t captures = { 0 };
18907 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18908
18909 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18910 pm_constant_id_list_free(&captures);
18911
18912 // Since we're in the top-level of the case-in node we need
18913 // to check for guard clauses in the form of `if` or
18914 // `unless` statements.
18915 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18916 pm_token_t keyword = parser->previous;
18917 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18918 pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
18919 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18920 pm_token_t keyword = parser->previous;
18921 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18922 pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
18923 }
18924
18925 // Now we need to check for the terminator of the in node's
18926 // pattern. It can be a newline or semicolon optionally
18927 // followed by a `then` keyword.
18928 pm_token_t then_keyword;
18929 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18930 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18931 then_keyword = parser->previous;
18932 } else {
18933 then_keyword = not_provided(parser);
18934 }
18935 } else {
18936 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18937 then_keyword = parser->previous;
18938 }
18939
18940 // Now we can actually parse the statements associated with
18941 // the in node.
18942 pm_statements_node_t *statements;
18944 statements = NULL;
18945 } else {
18946 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
18947 }
18948
18949 // Now that we have the full pattern and statements, we can
18950 // create the node and attach it to the case node.
18951 pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
18952 pm_case_match_node_condition_append(case_node, condition);
18953 }
18954
18955 // If we didn't parse any conditions (in or when) then we need
18956 // to indicate that we have an error.
18957 if (case_node->conditions.size == 0) {
18958 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18959 }
18960
18961 node = (pm_node_t *) case_node;
18962 }
18963
18964 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18965 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
18966 pm_token_t else_keyword = parser->previous;
18967 pm_else_node_t *else_node;
18968
18969 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
18970 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
18971 } else {
18972 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
18973 }
18974
18975 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18976 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
18977 } else {
18978 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
18979 }
18980 }
18981
18982 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18983 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
18984
18985 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18986 pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
18987 } else {
18988 pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
18989 }
18990
18991 pop_block_exits(parser, previous_block_exits);
18992 pm_node_list_free(&current_block_exits);
18993
18994 return node;
18995 }
18997 size_t opening_newline_index = token_newline_index(parser);
18998 parser_lex(parser);
18999
19000 pm_token_t begin_keyword = parser->previous;
19001 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19002
19003 pm_node_list_t current_block_exits = { 0 };
19004 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19005 pm_statements_node_t *begin_statements = NULL;
19006
19008 pm_accepts_block_stack_push(parser, true);
19009 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
19010 pm_accepts_block_stack_pop(parser);
19011 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19012 }
19013
19014 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
19015 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
19016 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
19017
19018 begin_node->base.location.end = parser->previous.end;
19019 pm_begin_node_end_keyword_set(begin_node, &parser->previous);
19020
19021 pop_block_exits(parser, previous_block_exits);
19022 pm_node_list_free(&current_block_exits);
19023
19024 return (pm_node_t *) begin_node;
19025 }
19027 pm_node_list_t current_block_exits = { 0 };
19028 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19029
19030 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19031 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
19032 }
19033
19034 parser_lex(parser);
19035 pm_token_t keyword = parser->previous;
19036
19037 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
19038 pm_token_t opening = parser->previous;
19039 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
19040
19041 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
19042 pm_context_t context = parser->current_context->context;
19043 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
19044 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19045 }
19046
19047 flush_block_exits(parser, previous_block_exits);
19048 pm_node_list_free(&current_block_exits);
19049
19050 return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19051 }
19055 parser_lex(parser);
19056
19057 pm_token_t keyword = parser->previous;
19058 pm_arguments_t arguments = { 0 };
19059
19060 if (
19061 token_begins_expression_p(parser->current.type) ||
19062 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19063 ) {
19064 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19065
19066 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19067 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
19068 }
19069 }
19070
19071 switch (keyword.type) {
19073 pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
19074 if (!parser->partial_script) parse_block_exit(parser, node);
19075 return node;
19076 }
19077 case PM_TOKEN_KEYWORD_NEXT: {
19078 pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
19079 if (!parser->partial_script) parse_block_exit(parser, node);
19080 return node;
19081 }
19083 pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
19084 parse_return(parser, node);
19085 return node;
19086 }
19087 default:
19088 assert(false && "unreachable");
19089 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
19090 }
19091 }
19093 parser_lex(parser);
19094
19095 pm_token_t keyword = parser->previous;
19096 pm_arguments_t arguments = { 0 };
19097 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
19098
19099 if (
19100 arguments.opening_loc.start == NULL &&
19101 arguments.arguments == NULL &&
19102 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19103 ) {
19104 return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
19105 }
19106
19107 return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
19108 }
19110 parser_lex(parser);
19111
19112 pm_token_t keyword = parser->previous;
19113 pm_arguments_t arguments = { 0 };
19114 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
19115
19116 // It's possible that we've parsed a block argument through our
19117 // call to parse_arguments_list. If we found one, we should mark it
19118 // as invalid and destroy it, as we don't have a place for it on the
19119 // yield node.
19120 if (arguments.block != NULL) {
19121 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19122 pm_node_destroy(parser, arguments.block);
19123 arguments.block = NULL;
19124 }
19125
19126 pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
19127 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19128
19129 return node;
19130 }
19132 size_t opening_newline_index = token_newline_index(parser);
19133 parser_lex(parser);
19134
19135 pm_token_t class_keyword = parser->previous;
19136 pm_do_loop_stack_push(parser, false);
19137
19138 pm_node_list_t current_block_exits = { 0 };
19139 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19140
19141 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
19142 pm_token_t operator = parser->previous;
19143 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
19144
19145 pm_parser_scope_push(parser, true);
19146 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19147 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
19148 }
19149
19150 pm_node_t *statements = NULL;
19152 pm_accepts_block_stack_push(parser, true);
19153 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
19154 pm_accepts_block_stack_pop(parser);
19155 }
19156
19157 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19158 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19159 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
19160 } else {
19161 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19162 }
19163
19164 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19165
19166 pm_constant_id_list_t locals;
19167 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19168
19169 pm_parser_scope_pop(parser);
19170 pm_do_loop_stack_pop(parser);
19171
19172 flush_block_exits(parser, previous_block_exits);
19173 pm_node_list_free(&current_block_exits);
19174
19175 return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
19176 }
19177
19178 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
19179 pm_token_t name = parser->previous;
19180 if (name.type != PM_TOKEN_CONSTANT) {
19181 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
19182 }
19183
19184 pm_token_t inheritance_operator;
19185 pm_node_t *superclass;
19186
19187 if (match1(parser, PM_TOKEN_LESS)) {
19188 inheritance_operator = parser->current;
19189 lex_state_set(parser, PM_LEX_STATE_BEG);
19190
19191 parser->command_start = true;
19192 parser_lex(parser);
19193
19194 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
19195 } else {
19196 inheritance_operator = not_provided(parser);
19197 superclass = NULL;
19198 }
19199
19200 pm_parser_scope_push(parser, true);
19201
19202 if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
19203 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
19204 } else {
19205 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19206 }
19207 pm_node_t *statements = NULL;
19208
19210 pm_accepts_block_stack_push(parser, true);
19211 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
19212 pm_accepts_block_stack_pop(parser);
19213 }
19214
19215 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19216 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19217 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
19218 } else {
19219 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19220 }
19221
19222 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19223
19224 if (context_def_p(parser)) {
19225 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
19226 }
19227
19228 pm_constant_id_list_t locals;
19229 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19230
19231 pm_parser_scope_pop(parser);
19232 pm_do_loop_stack_pop(parser);
19233
19234 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
19235 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
19236 }
19237
19238 pop_block_exits(parser, previous_block_exits);
19239 pm_node_list_free(&current_block_exits);
19240
19241 return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
19242 }
19243 case PM_TOKEN_KEYWORD_DEF: {
19244 pm_node_list_t current_block_exits = { 0 };
19245 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19246
19247 pm_token_t def_keyword = parser->current;
19248 size_t opening_newline_index = token_newline_index(parser);
19249
19250 pm_node_t *receiver = NULL;
19251 pm_token_t operator = not_provided(parser);
19252 pm_token_t name;
19253
19254 // This context is necessary for lexing `...` in a bare params
19255 // correctly. It must be pushed before lexing the first param, so it
19256 // is here.
19257 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19258 parser_lex(parser);
19259
19260 // This will be false if the method name is not a valid identifier
19261 // but could be followed by an operator.
19262 bool valid_name = true;
19263
19264 switch (parser->current.type) {
19265 case PM_CASE_OPERATOR:
19266 pm_parser_scope_push(parser, true);
19267 lex_state_set(parser, PM_LEX_STATE_ENDFN);
19268 parser_lex(parser);
19269
19270 name = parser->previous;
19271 break;
19272 case PM_TOKEN_IDENTIFIER: {
19273 parser_lex(parser);
19274
19275 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19276 receiver = parse_variable_call(parser);
19277
19278 pm_parser_scope_push(parser, true);
19279 lex_state_set(parser, PM_LEX_STATE_FNAME);
19280 parser_lex(parser);
19281
19282 operator = parser->previous;
19283 name = parse_method_definition_name(parser);
19284 } else {
19285 pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
19286 pm_parser_scope_push(parser, true);
19287
19288 name = parser->previous;
19289 }
19290
19291 break;
19292 }
19296 valid_name = false;
19297 /* fallthrough */
19298 case PM_TOKEN_CONSTANT:
19306 pm_parser_scope_push(parser, true);
19307 parser_lex(parser);
19308
19309 pm_token_t identifier = parser->previous;
19310
19311 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19312 lex_state_set(parser, PM_LEX_STATE_FNAME);
19313 parser_lex(parser);
19314 operator = parser->previous;
19315
19316 switch (identifier.type) {
19317 case PM_TOKEN_CONSTANT:
19318 receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
19319 break;
19321 receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
19322 break;
19324 receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
19325 break;
19327 receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
19328 break;
19330 receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
19331 break;
19333 receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
19334 break;
19336 receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
19337 break;
19339 receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
19340 break;
19342 receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
19343 break;
19345 receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
19346 break;
19348 receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
19349 break;
19350 default:
19351 break;
19352 }
19353
19354 name = parse_method_definition_name(parser);
19355 } else {
19356 if (!valid_name) {
19357 PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19358 }
19359
19360 name = identifier;
19361 }
19362 break;
19363 }
19365 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
19366 // the inner expression of this parenthesis should not be
19367 // processed under this context. Thus, the context is popped
19368 // here.
19369 context_pop(parser);
19370 parser_lex(parser);
19371
19372 pm_token_t lparen = parser->previous;
19373 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
19374
19375 accept1(parser, PM_TOKEN_NEWLINE);
19376 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19377 pm_token_t rparen = parser->previous;
19378
19379 lex_state_set(parser, PM_LEX_STATE_FNAME);
19380 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
19381
19382 operator = parser->previous;
19383 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
19384
19385 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
19386 // reason as described the above.
19387 pm_parser_scope_push(parser, true);
19388 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19389 name = parse_method_definition_name(parser);
19390 break;
19391 }
19392 default:
19393 pm_parser_scope_push(parser, true);
19394 name = parse_method_definition_name(parser);
19395 break;
19396 }
19397
19398 pm_token_t lparen;
19399 pm_token_t rparen;
19400 pm_parameters_node_t *params;
19401
19402 switch (parser->current.type) {
19404 parser_lex(parser);
19405 lparen = parser->previous;
19406
19407 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19408 params = NULL;
19409 } else {
19410 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
19411 }
19412
19413 lex_state_set(parser, PM_LEX_STATE_BEG);
19414 parser->command_start = true;
19415
19416 context_pop(parser);
19417 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19418 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
19419 parser->previous.start = parser->previous.end;
19420 parser->previous.type = PM_TOKEN_MISSING;
19421 }
19422
19423 rparen = parser->previous;
19424 break;
19425 }
19426 case PM_CASE_PARAMETER: {
19427 // If we're about to lex a label, we need to add the label
19428 // state to make sure the next newline is ignored.
19429 if (parser->current.type == PM_TOKEN_LABEL) {
19430 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
19431 }
19432
19433 lparen = not_provided(parser);
19434 rparen = not_provided(parser);
19435 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
19436
19437 context_pop(parser);
19438 break;
19439 }
19440 default: {
19441 lparen = not_provided(parser);
19442 rparen = not_provided(parser);
19443 params = NULL;
19444
19445 context_pop(parser);
19446 break;
19447 }
19448 }
19449
19450 pm_node_t *statements = NULL;
19451 pm_token_t equal;
19452 pm_token_t end_keyword;
19453
19454 if (accept1(parser, PM_TOKEN_EQUAL)) {
19455 if (token_is_setter_name(&name)) {
19456 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
19457 }
19458 equal = parser->previous;
19459
19460 context_push(parser, PM_CONTEXT_DEF);
19461 pm_do_loop_stack_push(parser, false);
19462 statements = (pm_node_t *) pm_statements_node_create(parser);
19463
19464 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19465
19466 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
19467 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
19468
19469 pm_token_t rescue_keyword = parser->previous;
19470 pm_node_t *value = parse_expression(parser, binding_power, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19471 context_pop(parser);
19472
19473 statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
19474 }
19475
19476 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
19477 pm_do_loop_stack_pop(parser);
19478 context_pop(parser);
19479 end_keyword = not_provided(parser);
19480 } else {
19481 equal = not_provided(parser);
19482
19483 if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
19484 lex_state_set(parser, PM_LEX_STATE_BEG);
19485 parser->command_start = true;
19486 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
19487 } else {
19488 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19489 }
19490
19491 pm_accepts_block_stack_push(parser, true);
19492 pm_do_loop_stack_push(parser, false);
19493
19495 pm_accepts_block_stack_push(parser, true);
19496 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
19497 pm_accepts_block_stack_pop(parser);
19498 }
19499
19501 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19502 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
19503 } else {
19504 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
19505 }
19506
19507 pm_accepts_block_stack_pop(parser);
19508 pm_do_loop_stack_pop(parser);
19509
19510 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
19511 end_keyword = parser->previous;
19512 }
19513
19514 pm_constant_id_list_t locals;
19515 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19516 pm_parser_scope_pop(parser);
19517
19523 pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
19524
19525 flush_block_exits(parser, previous_block_exits);
19526 pm_node_list_free(&current_block_exits);
19527
19528 return (pm_node_t *) pm_def_node_create(
19529 parser,
19530 name_id,
19531 &name,
19532 receiver,
19533 params,
19534 statements,
19535 &locals,
19536 &def_keyword,
19537 &operator,
19538 &lparen,
19539 &rparen,
19540 &equal,
19541 &end_keyword
19542 );
19543 }
19545 parser_lex(parser);
19546 pm_token_t keyword = parser->previous;
19547
19548 pm_token_t lparen;
19549 pm_token_t rparen;
19550 pm_node_t *expression;
19551 context_push(parser, PM_CONTEXT_DEFINED);
19552
19553 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19554 lparen = parser->previous;
19555 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19556
19557 if (parser->recovering) {
19558 rparen = not_provided(parser);
19559 } else {
19560 accept1(parser, PM_TOKEN_NEWLINE);
19561 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19562 rparen = parser->previous;
19563 }
19564 } else {
19565 lparen = not_provided(parser);
19566 rparen = not_provided(parser);
19567 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19568 }
19569
19570 context_pop(parser);
19571 return (pm_node_t *) pm_defined_node_create(
19572 parser,
19573 &lparen,
19574 expression,
19575 &rparen,
19576 &PM_LOCATION_TOKEN_VALUE(&keyword)
19577 );
19578 }
19580 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19581 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19582 }
19583
19584 parser_lex(parser);
19585 pm_token_t keyword = parser->previous;
19586
19587 if (context_def_p(parser)) {
19588 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19589 }
19590
19591 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19592 pm_token_t opening = parser->previous;
19593 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19594
19595 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
19596 return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19597 }
19599 parser_lex(parser);
19600 return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
19601 case PM_TOKEN_KEYWORD_FOR: {
19602 size_t opening_newline_index = token_newline_index(parser);
19603 parser_lex(parser);
19604
19605 pm_token_t for_keyword = parser->previous;
19606 pm_node_t *index;
19607
19608 context_push(parser, PM_CONTEXT_FOR_INDEX);
19609
19610 // First, parse out the first index expression.
19611 if (accept1(parser, PM_TOKEN_USTAR)) {
19612 pm_token_t star_operator = parser->previous;
19613 pm_node_t *name = NULL;
19614
19615 if (token_begins_expression_p(parser->current.type)) {
19616 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19617 }
19618
19619 index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
19620 } else if (token_begins_expression_p(parser->current.type)) {
19621 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19622 } else {
19623 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19624 index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
19625 }
19626
19627 // Now, if there are multiple index expressions, parse them out.
19628 if (match1(parser, PM_TOKEN_COMMA)) {
19629 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19630 } else {
19631 index = parse_target(parser, index, false, false);
19632 }
19633
19634 context_pop(parser);
19635 pm_do_loop_stack_push(parser, true);
19636
19637 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19638 pm_token_t in_keyword = parser->previous;
19639
19640 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19641 pm_do_loop_stack_pop(parser);
19642
19643 pm_token_t do_keyword;
19644 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19645 do_keyword = parser->previous;
19646 } else {
19647 do_keyword = not_provided(parser);
19648 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19649 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19650 }
19651 }
19652
19653 pm_statements_node_t *statements = NULL;
19654 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19655 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19656 }
19657
19658 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19659 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
19660
19661 return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
19662 }
19664 if (parser_end_of_line_p(parser)) {
19665 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19666 }
19667
19668 size_t opening_newline_index = token_newline_index(parser);
19669 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19670 parser_lex(parser);
19671
19672 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19674 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19675 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19676 }
19677
19678 parser_lex(parser);
19679 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19680 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19681
19682 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19683 pm_node_destroy(parser, name);
19684 } else {
19685 pm_undef_node_append(undef, name);
19686
19687 while (match1(parser, PM_TOKEN_COMMA)) {
19688 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19689 parser_lex(parser);
19690 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19691
19692 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19693 pm_node_destroy(parser, name);
19694 break;
19695 }
19696
19697 pm_undef_node_append(undef, name);
19698 }
19699 }
19700
19701 return (pm_node_t *) undef;
19702 }
19703 case PM_TOKEN_KEYWORD_NOT: {
19704 parser_lex(parser);
19705
19706 pm_token_t message = parser->previous;
19707 pm_arguments_t arguments = { 0 };
19708 pm_node_t *receiver = NULL;
19709
19710 accept1(parser, PM_TOKEN_NEWLINE);
19711
19712 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19713 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19714
19715 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19716 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19717 } else {
19718 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19719
19720 if (!parser->recovering) {
19721 accept1(parser, PM_TOKEN_NEWLINE);
19722 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19723 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19724 }
19725 }
19726 } else {
19727 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19728 }
19729
19730 return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
19731 }
19733 size_t opening_newline_index = token_newline_index(parser);
19734 parser_lex(parser);
19735
19736 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19737 }
19739 pm_node_list_t current_block_exits = { 0 };
19740 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19741
19742 size_t opening_newline_index = token_newline_index(parser);
19743 parser_lex(parser);
19744 pm_token_t module_keyword = parser->previous;
19745
19746 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19747 pm_token_t name;
19748
19749 // If we can recover from a syntax error that occurred while parsing
19750 // the name of the module, then we'll handle that here.
19751 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19752 pop_block_exits(parser, previous_block_exits);
19753 pm_node_list_free(&current_block_exits);
19754
19755 pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19756 return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
19757 }
19758
19759 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19760 pm_token_t double_colon = parser->previous;
19761
19762 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19763 constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
19764 }
19765
19766 // Here we retrieve the name of the module. If it wasn't a constant,
19767 // then it's possible that `module foo` was passed, which is a
19768 // syntax error. We handle that here as well.
19769 name = parser->previous;
19770 if (name.type != PM_TOKEN_CONSTANT) {
19771 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19772 }
19773
19774 pm_parser_scope_push(parser, true);
19775 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19776 pm_node_t *statements = NULL;
19777
19779 pm_accepts_block_stack_push(parser, true);
19780 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
19781 pm_accepts_block_stack_pop(parser);
19782 }
19783
19785 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19786 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
19787 } else {
19788 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19789 }
19790
19791 pm_constant_id_list_t locals;
19792 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19793
19794 pm_parser_scope_pop(parser);
19795 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
19796
19797 if (context_def_p(parser)) {
19798 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19799 }
19800
19801 pop_block_exits(parser, previous_block_exits);
19802 pm_node_list_free(&current_block_exits);
19803
19804 return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
19805 }
19807 parser_lex(parser);
19808 return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
19809 case PM_TOKEN_KEYWORD_REDO: {
19810 parser_lex(parser);
19811
19812 pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
19813 if (!parser->partial_script) parse_block_exit(parser, node);
19814
19815 return node;
19816 }
19818 parser_lex(parser);
19819
19820 pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
19821 parse_retry(parser, node);
19822
19823 return node;
19824 }
19826 parser_lex(parser);
19827 return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
19829 parser_lex(parser);
19830 return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
19832 size_t opening_newline_index = token_newline_index(parser);
19833
19834 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19835 pm_do_loop_stack_push(parser, true);
19836
19837 parser_lex(parser);
19838 pm_token_t keyword = parser->previous;
19839 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19840
19841 pm_do_loop_stack_pop(parser);
19842 context_pop(parser);
19843
19844 pm_token_t do_keyword;
19845 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19846 do_keyword = parser->previous;
19847 } else {
19848 do_keyword = not_provided(parser);
19849 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19850 }
19851
19852 pm_statements_node_t *statements = NULL;
19853 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19854 pm_accepts_block_stack_push(parser, true);
19855 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19856 pm_accepts_block_stack_pop(parser);
19857 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19858 }
19859
19860 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19861 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19862
19863 return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19864 }
19866 size_t opening_newline_index = token_newline_index(parser);
19867
19868 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19869 pm_do_loop_stack_push(parser, true);
19870
19871 parser_lex(parser);
19872 pm_token_t keyword = parser->previous;
19873 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19874
19875 pm_do_loop_stack_pop(parser);
19876 context_pop(parser);
19877
19878 pm_token_t do_keyword;
19879 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19880 do_keyword = parser->previous;
19881 } else {
19882 do_keyword = not_provided(parser);
19883 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19884 }
19885
19886 pm_statements_node_t *statements = NULL;
19887 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19888 pm_accepts_block_stack_push(parser, true);
19889 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19890 pm_accepts_block_stack_pop(parser);
19891 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19892 }
19893
19894 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19895 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19896
19897 return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19898 }
19900 parser_lex(parser);
19901 pm_token_t opening = parser->previous;
19902 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19903
19904 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19905 accept1(parser, PM_TOKEN_WORDS_SEP);
19906 if (match1(parser, PM_TOKEN_STRING_END)) break;
19907
19908 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19909 pm_token_t opening = not_provided(parser);
19910 pm_token_t closing = not_provided(parser);
19911 pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19912 }
19913
19914 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19915 }
19916
19917 pm_token_t closing = parser->current;
19918 if (match1(parser, PM_TOKEN_EOF)) {
19919 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19920 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19921 } else {
19922 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19923 }
19924 pm_array_node_close_set(array, &closing);
19925
19926 return (pm_node_t *) array;
19927 }
19929 parser_lex(parser);
19930 pm_token_t opening = parser->previous;
19931 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19932
19933 // This is the current node that we are parsing that will be added to the
19934 // list of elements.
19935 pm_node_t *current = NULL;
19936
19937 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19938 switch (parser->current.type) {
19939 case PM_TOKEN_WORDS_SEP: {
19940 if (current == NULL) {
19941 // If we hit a separator before we have any content, then we don't
19942 // need to do anything.
19943 } else {
19944 // If we hit a separator after we've hit content, then we need to
19945 // append that content to the list and reset the current node.
19946 pm_array_node_elements_append(array, current);
19947 current = NULL;
19948 }
19949
19950 parser_lex(parser);
19951 break;
19952 }
19954 pm_token_t opening = not_provided(parser);
19955 pm_token_t closing = not_provided(parser);
19956
19957 if (current == NULL) {
19958 // If we hit content and the current node is NULL, then this is
19959 // the first string content we've seen. In that case we're going
19960 // to create a new string node and set that to the current.
19961 current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
19962 parser_lex(parser);
19963 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19964 // If we hit string content and the current node is an
19965 // interpolated string, then we need to append the string content
19966 // to the list of child nodes.
19967 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
19968 parser_lex(parser);
19969
19970 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19971 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19972 // If we hit string content and the current node is a symbol node,
19973 // then we need to convert the current node into an interpolated
19974 // string and add the string content to the list of child nodes.
19975 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19976 pm_token_t bounds = not_provided(parser);
19977
19978 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
19979 pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
19980 pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
19981 parser_lex(parser);
19982
19983 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19984 pm_interpolated_symbol_node_append(interpolated, first_string);
19985 pm_interpolated_symbol_node_append(interpolated, second_string);
19986
19987 xfree(current);
19988 current = (pm_node_t *) interpolated;
19989 } else {
19990 assert(false && "unreachable");
19991 }
19992
19993 break;
19994 }
19995 case PM_TOKEN_EMBVAR: {
19996 bool start_location_set = false;
19997 if (current == NULL) {
19998 // If we hit an embedded variable and the current node is NULL,
19999 // then this is the start of a new string. We'll set the current
20000 // node to a new interpolated string.
20001 pm_token_t opening = not_provided(parser);
20002 pm_token_t closing = not_provided(parser);
20003 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20004 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20005 // If we hit an embedded variable and the current node is a string
20006 // node, then we'll convert the current into an interpolated
20007 // string and add the string node to the list of parts.
20008 pm_token_t opening = not_provided(parser);
20009 pm_token_t closing = not_provided(parser);
20010 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20011
20012 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20013 pm_interpolated_symbol_node_append(interpolated, current);
20014 interpolated->base.location.start = current->location.start;
20015 start_location_set = true;
20016 current = (pm_node_t *) interpolated;
20017 } else {
20018 // If we hit an embedded variable and the current node is an
20019 // interpolated string, then we'll just add the embedded variable.
20020 }
20021
20022 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20023 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20024 if (!start_location_set) {
20025 current->location.start = part->location.start;
20026 }
20027 break;
20028 }
20030 bool start_location_set = false;
20031 if (current == NULL) {
20032 // If we hit an embedded expression and the current node is NULL,
20033 // then this is the start of a new string. We'll set the current
20034 // node to a new interpolated string.
20035 pm_token_t opening = not_provided(parser);
20036 pm_token_t closing = not_provided(parser);
20037 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20038 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20039 // If we hit an embedded expression and the current node is a
20040 // string node, then we'll convert the current into an
20041 // interpolated string and add the string node to the list of
20042 // parts.
20043 pm_token_t opening = not_provided(parser);
20044 pm_token_t closing = not_provided(parser);
20045 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20046
20047 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20048 pm_interpolated_symbol_node_append(interpolated, current);
20049 interpolated->base.location.start = current->location.start;
20050 start_location_set = true;
20051 current = (pm_node_t *) interpolated;
20052 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20053 // If we hit an embedded expression and the current node is an
20054 // interpolated string, then we'll just continue on.
20055 } else {
20056 assert(false && "unreachable");
20057 }
20058
20059 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20060 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20061 if (!start_location_set) {
20062 current->location.start = part->location.start;
20063 }
20064 break;
20065 }
20066 default:
20067 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
20068 parser_lex(parser);
20069 break;
20070 }
20071 }
20072
20073 // If we have a current node, then we need to append it to the list.
20074 if (current) {
20075 pm_array_node_elements_append(array, current);
20076 }
20077
20078 pm_token_t closing = parser->current;
20079 if (match1(parser, PM_TOKEN_EOF)) {
20080 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
20081 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20082 } else {
20083 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
20084 }
20085 pm_array_node_close_set(array, &closing);
20086
20087 return (pm_node_t *) array;
20088 }
20090 parser_lex(parser);
20091 pm_token_t opening = parser->previous;
20092 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20093
20094 // skip all leading whitespaces
20095 accept1(parser, PM_TOKEN_WORDS_SEP);
20096
20097 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20098 accept1(parser, PM_TOKEN_WORDS_SEP);
20099 if (match1(parser, PM_TOKEN_STRING_END)) break;
20100
20101 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20102 pm_token_t opening = not_provided(parser);
20103 pm_token_t closing = not_provided(parser);
20104
20105 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20106 pm_array_node_elements_append(array, string);
20107 }
20108
20109 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20110 }
20111
20112 pm_token_t closing = parser->current;
20113 if (match1(parser, PM_TOKEN_EOF)) {
20114 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20115 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20116 } else {
20117 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20118 }
20119
20120 pm_array_node_close_set(array, &closing);
20121 return (pm_node_t *) array;
20122 }
20124 parser_lex(parser);
20125 pm_token_t opening = parser->previous;
20126 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20127
20128 // This is the current node that we are parsing that will be added
20129 // to the list of elements.
20130 pm_node_t *current = NULL;
20131
20132 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20133 switch (parser->current.type) {
20134 case PM_TOKEN_WORDS_SEP: {
20135 // Reset the explicit encoding if we hit a separator
20136 // since each element can have its own encoding.
20137 parser->explicit_encoding = NULL;
20138
20139 if (current == NULL) {
20140 // If we hit a separator before we have any content,
20141 // then we don't need to do anything.
20142 } else {
20143 // If we hit a separator after we've hit content,
20144 // then we need to append that content to the list
20145 // and reset the current node.
20146 pm_array_node_elements_append(array, current);
20147 current = NULL;
20148 }
20149
20150 parser_lex(parser);
20151 break;
20152 }
20154 pm_token_t opening = not_provided(parser);
20155 pm_token_t closing = not_provided(parser);
20156
20157 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20158 pm_node_flag_set(string, parse_unescaped_encoding(parser));
20159 parser_lex(parser);
20160
20161 if (current == NULL) {
20162 // If we hit content and the current node is NULL,
20163 // then this is the first string content we've seen.
20164 // In that case we're going to create a new string
20165 // node and set that to the current.
20166 current = string;
20167 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20168 // If we hit string content and the current node is
20169 // an interpolated string, then we need to append
20170 // the string content to the list of child nodes.
20171 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
20172 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20173 // If we hit string content and the current node is
20174 // a string node, then we need to convert the
20175 // current node into an interpolated string and add
20176 // the string content to the list of child nodes.
20177 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20178 pm_interpolated_string_node_append(interpolated, current);
20179 pm_interpolated_string_node_append(interpolated, string);
20180 current = (pm_node_t *) interpolated;
20181 } else {
20182 assert(false && "unreachable");
20183 }
20184
20185 break;
20186 }
20187 case PM_TOKEN_EMBVAR: {
20188 if (current == NULL) {
20189 // If we hit an embedded variable and the current
20190 // node is NULL, then this is the start of a new
20191 // string. We'll set the current node to a new
20192 // interpolated string.
20193 pm_token_t opening = not_provided(parser);
20194 pm_token_t closing = not_provided(parser);
20195 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20196 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20197 // If we hit an embedded variable and the current
20198 // node is a string node, then we'll convert the
20199 // current into an interpolated string and add the
20200 // string node to the list of parts.
20201 pm_token_t opening = not_provided(parser);
20202 pm_token_t closing = not_provided(parser);
20203 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20204 pm_interpolated_string_node_append(interpolated, current);
20205 current = (pm_node_t *) interpolated;
20206 } else {
20207 // If we hit an embedded variable and the current
20208 // node is an interpolated string, then we'll just
20209 // add the embedded variable.
20210 }
20211
20212 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20213 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20214 break;
20215 }
20217 if (current == NULL) {
20218 // If we hit an embedded expression and the current
20219 // node is NULL, then this is the start of a new
20220 // string. We'll set the current node to a new
20221 // interpolated string.
20222 pm_token_t opening = not_provided(parser);
20223 pm_token_t closing = not_provided(parser);
20224 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20225 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20226 // If we hit an embedded expression and the current
20227 // node is a string node, then we'll convert the
20228 // current into an interpolated string and add the
20229 // string node to the list of parts.
20230 pm_token_t opening = not_provided(parser);
20231 pm_token_t closing = not_provided(parser);
20232 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20233 pm_interpolated_string_node_append(interpolated, current);
20234 current = (pm_node_t *) interpolated;
20235 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20236 // If we hit an embedded expression and the current
20237 // node is an interpolated string, then we'll just
20238 // continue on.
20239 } else {
20240 assert(false && "unreachable");
20241 }
20242
20243 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20244 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20245 break;
20246 }
20247 default:
20248 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
20249 parser_lex(parser);
20250 break;
20251 }
20252 }
20253
20254 // If we have a current node, then we need to append it to the list.
20255 if (current) {
20256 pm_array_node_elements_append(array, current);
20257 }
20258
20259 pm_token_t closing = parser->current;
20260 if (match1(parser, PM_TOKEN_EOF)) {
20261 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
20262 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20263 } else {
20264 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
20265 }
20266
20267 pm_array_node_close_set(array, &closing);
20268 return (pm_node_t *) array;
20269 }
20270 case PM_TOKEN_REGEXP_BEGIN: {
20271 pm_token_t opening = parser->current;
20272 parser_lex(parser);
20273
20274 if (match1(parser, PM_TOKEN_REGEXP_END)) {
20275 // If we get here, then we have an end immediately after a start. In
20276 // that case we'll create an empty content token and return an
20277 // uninterpolated regular expression.
20278 pm_token_t content = (pm_token_t) {
20280 .start = parser->previous.end,
20281 .end = parser->previous.end
20282 };
20283
20284 parser_lex(parser);
20285
20286 pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20288
20289 return node;
20290 }
20291
20293
20294 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20295 // In this case we've hit string content so we know the regular
20296 // expression at least has something in it. We'll need to check if the
20297 // following token is the end (in which case we can return a plain
20298 // regular expression) or if it's not then it has interpolation.
20299 pm_string_t unescaped = parser->current_string;
20300 pm_token_t content = parser->current;
20301 bool ascii_only = parser->current_regular_expression_ascii_only;
20302 parser_lex(parser);
20303
20304 // If we hit an end, then we can create a regular expression
20305 // node without interpolation, which can be represented more
20306 // succinctly and more easily compiled.
20307 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20308 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20309
20310 // If we're not immediately followed by a =~, then we want
20311 // to parse all of the errors at this point. If it is
20312 // followed by a =~, then it will get parsed higher up while
20313 // parsing the named captures as well.
20314 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20315 parse_regular_expression_errors(parser, node);
20316 }
20317
20318 pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
20319 return (pm_node_t *) node;
20320 }
20321
20322 // If we get here, then we have interpolation so we'll need to create
20323 // a regular expression node with interpolation.
20324 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20325
20326 pm_token_t opening = not_provided(parser);
20327 pm_token_t closing = not_provided(parser);
20328 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20329
20330 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20331 // This is extremely strange, but the first string part of a
20332 // regular expression will always be tagged as binary if we
20333 // are in a US-ASCII file, no matter its contents.
20334 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20335 }
20336
20337 pm_interpolated_regular_expression_node_append(interpolated, part);
20338 } else {
20339 // If the first part of the body of the regular expression is not a
20340 // string content, then we have interpolation and we need to create an
20341 // interpolated regular expression node.
20342 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20343 }
20344
20345 // Now that we're here and we have interpolation, we'll parse all of the
20346 // parts into the list.
20347 pm_node_t *part;
20348 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20349 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20350 pm_interpolated_regular_expression_node_append(interpolated, part);
20351 }
20352 }
20353
20354 pm_token_t closing = parser->current;
20355 if (match1(parser, PM_TOKEN_EOF)) {
20356 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20357 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20358 } else {
20359 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20360 }
20361
20362 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20363 return (pm_node_t *) interpolated;
20364 }
20365 case PM_TOKEN_BACKTICK:
20367 parser_lex(parser);
20368 pm_token_t opening = parser->previous;
20369
20370 // When we get here, we don't know if this string is going to have
20371 // interpolation or not, even though it is allowed. Still, we want to be
20372 // able to return a string node without interpolation if we can since
20373 // it'll be faster.
20374 if (match1(parser, PM_TOKEN_STRING_END)) {
20375 // If we get here, then we have an end immediately after a start. In
20376 // that case we'll create an empty content token and return an
20377 // uninterpolated string.
20378 pm_token_t content = (pm_token_t) {
20380 .start = parser->previous.end,
20381 .end = parser->previous.end
20382 };
20383
20384 parser_lex(parser);
20385 return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
20386 }
20387
20389
20390 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20391 // In this case we've hit string content so we know the string
20392 // at least has something in it. We'll need to check if the
20393 // following token is the end (in which case we can return a
20394 // plain string) or if it's not then it has interpolation.
20395 pm_string_t unescaped = parser->current_string;
20396 pm_token_t content = parser->current;
20397 parser_lex(parser);
20398
20399 if (match1(parser, PM_TOKEN_STRING_END)) {
20400 pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
20401 pm_node_flag_set(node, parse_unescaped_encoding(parser));
20402 parser_lex(parser);
20403 return node;
20404 }
20405
20406 // If we get here, then we have interpolation so we'll need to
20407 // create a string node with interpolation.
20408 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20409
20410 pm_token_t opening = not_provided(parser);
20411 pm_token_t closing = not_provided(parser);
20412
20413 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20414 pm_node_flag_set(part, parse_unescaped_encoding(parser));
20415
20416 pm_interpolated_xstring_node_append(node, part);
20417 } else {
20418 // If the first part of the body of the string is not a string
20419 // content, then we have interpolation and we need to create an
20420 // interpolated string node.
20421 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20422 }
20423
20424 pm_node_t *part;
20425 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20426 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20427 pm_interpolated_xstring_node_append(node, part);
20428 }
20429 }
20430
20431 pm_token_t closing = parser->current;
20432 if (match1(parser, PM_TOKEN_EOF)) {
20433 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20434 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20435 } else {
20436 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20437 }
20438 pm_interpolated_xstring_node_closing_set(node, &closing);
20439
20440 return (pm_node_t *) node;
20441 }
20442 case PM_TOKEN_USTAR: {
20443 parser_lex(parser);
20444
20445 // * operators at the beginning of expressions are only valid in the
20446 // context of a multiple assignment. We enforce that here. We'll
20447 // still lex past it though and create a missing node place.
20448 if (binding_power != PM_BINDING_POWER_STATEMENT) {
20449 pm_parser_err_prefix(parser, diag_id);
20450 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20451 }
20452
20453 pm_token_t operator = parser->previous;
20454 pm_node_t *name = NULL;
20455
20456 if (token_begins_expression_p(parser->current.type)) {
20457 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20458 }
20459
20460 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
20461
20462 if (match1(parser, PM_TOKEN_COMMA)) {
20463 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20464 } else {
20465 return parse_target_validate(parser, splat, true);
20466 }
20467 }
20468 case PM_TOKEN_BANG: {
20469 if (binding_power > PM_BINDING_POWER_UNARY) {
20470 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20471 }
20472
20473 parser_lex(parser);
20474
20475 pm_token_t operator = parser->previous;
20476 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20477 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20478
20479 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20480 return (pm_node_t *) node;
20481 }
20482 case PM_TOKEN_TILDE: {
20483 if (binding_power > PM_BINDING_POWER_UNARY) {
20484 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20485 }
20486 parser_lex(parser);
20487
20488 pm_token_t operator = parser->previous;
20489 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20490 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20491
20492 return (pm_node_t *) node;
20493 }
20494 case PM_TOKEN_UMINUS: {
20495 if (binding_power > PM_BINDING_POWER_UNARY) {
20496 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20497 }
20498 parser_lex(parser);
20499
20500 pm_token_t operator = parser->previous;
20501 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20502 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20503
20504 return (pm_node_t *) node;
20505 }
20506 case PM_TOKEN_UMINUS_NUM: {
20507 parser_lex(parser);
20508
20509 pm_token_t operator = parser->previous;
20510 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20511
20512 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20513 pm_token_t exponent_operator = parser->previous;
20514 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20515 node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
20516 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20517 } else {
20518 switch (PM_NODE_TYPE(node)) {
20519 case PM_INTEGER_NODE:
20520 case PM_FLOAT_NODE:
20521 case PM_RATIONAL_NODE:
20522 case PM_IMAGINARY_NODE:
20523 parse_negative_numeric(node);
20524 break;
20525 default:
20526 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20527 break;
20528 }
20529 }
20530
20531 return node;
20532 }
20534 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20536
20537 size_t opening_newline_index = token_newline_index(parser);
20538 pm_accepts_block_stack_push(parser, true);
20539 parser_lex(parser);
20540
20541 pm_token_t operator = parser->previous;
20542 pm_parser_scope_push(parser, false);
20543
20544 pm_block_parameters_node_t *block_parameters;
20545
20546 switch (parser->current.type) {
20548 pm_token_t opening = parser->current;
20549 parser_lex(parser);
20550
20551 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20552 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20553 } else {
20554 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20555 }
20556
20557 accept1(parser, PM_TOKEN_NEWLINE);
20558 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20559
20560 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20561 break;
20562 }
20563 case PM_CASE_PARAMETER: {
20564 pm_accepts_block_stack_push(parser, false);
20565 pm_token_t opening = not_provided(parser);
20566 block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20567 pm_accepts_block_stack_pop(parser);
20568 break;
20569 }
20570 default: {
20571 block_parameters = NULL;
20572 break;
20573 }
20574 }
20575
20576 pm_token_t opening;
20577 pm_node_t *body = NULL;
20578 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20579
20580 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20581 opening = parser->previous;
20582
20583 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20584 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
20585 }
20586
20587 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20588 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
20589 } else {
20590 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20591 opening = parser->previous;
20592
20594 pm_accepts_block_stack_push(parser, true);
20595 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
20596 pm_accepts_block_stack_pop(parser);
20597 }
20598
20599 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20600 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20601 body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
20602 } else {
20603 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20604 }
20605
20606 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
20607 }
20608
20609 pm_constant_id_list_t locals;
20610 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20611 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
20612
20613 pm_parser_scope_pop(parser);
20614 pm_accepts_block_stack_pop(parser);
20615
20616 return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
20617 }
20618 case PM_TOKEN_UPLUS: {
20619 if (binding_power > PM_BINDING_POWER_UNARY) {
20620 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20621 }
20622 parser_lex(parser);
20623
20624 pm_token_t operator = parser->previous;
20625 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20626 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20627
20628 return (pm_node_t *) node;
20629 }
20631 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20632 case PM_TOKEN_SYMBOL_BEGIN: {
20633 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20634 parser_lex(parser);
20635
20636 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20637 }
20638 default: {
20639 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20640
20641 if (recoverable != PM_CONTEXT_NONE) {
20642 parser->recovering = true;
20643
20644 // If the given error is not the generic one, then we'll add it
20645 // here because it will provide more context in addition to the
20646 // recoverable error that we will also add.
20647 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20648 pm_parser_err_prefix(parser, diag_id);
20649 }
20650
20651 // If we get here, then we are assuming this token is closing a
20652 // parent context, so we'll indicate that to the user so that
20653 // they know how we behaved.
20654 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20655 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20656 // We're going to make a special case here, because "cannot
20657 // parse expression" is pretty generic, and we know here that we
20658 // have an unexpected token.
20659 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20660 } else {
20661 pm_parser_err_prefix(parser, diag_id);
20662 }
20663
20664 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20665 }
20666 }
20667}
20668
20678static pm_node_t *
20679parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20680 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20681
20682 // Contradicting binding powers, the right-hand-side value of the assignment
20683 // allows the `rescue` modifier.
20684 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20685 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20686
20687 pm_token_t rescue = parser->current;
20688 parser_lex(parser);
20689
20690 pm_node_t *right = parse_expression(parser, binding_power, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20691 context_pop(parser);
20692
20693 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20694 }
20695
20696 return value;
20697}
20698
20703static void
20704parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20705 switch (PM_NODE_TYPE(node)) {
20706 case PM_BEGIN_NODE: {
20707 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20708 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20709 break;
20710 }
20713 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20714 break;
20715 }
20716 case PM_PARENTHESES_NODE: {
20717 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20718 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20719 break;
20720 }
20721 case PM_STATEMENTS_NODE: {
20722 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20723 const pm_node_t *statement;
20724
20725 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20726 parse_assignment_value_local(parser, statement);
20727 }
20728 break;
20729 }
20730 default:
20731 break;
20732 }
20733}
20734
20747static pm_node_t *
20748parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20749 bool permitted = true;
20750 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20751
20752 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id, (uint16_t) (depth + 1));
20753 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20754
20755 parse_assignment_value_local(parser, value);
20756 bool single_value = true;
20757
20758 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20759 single_value = false;
20760
20761 pm_token_t opening = not_provided(parser);
20762 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20763
20764 pm_array_node_elements_append(array, value);
20765 value = (pm_node_t *) array;
20766
20767 while (accept1(parser, PM_TOKEN_COMMA)) {
20768 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20769
20770 pm_array_node_elements_append(array, element);
20771 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20772
20773 parse_assignment_value_local(parser, element);
20774 }
20775 }
20776
20777 // Contradicting binding powers, the right-hand-side value of the assignment
20778 // allows the `rescue` modifier.
20779 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20780 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20781
20782 pm_token_t rescue = parser->current;
20783 parser_lex(parser);
20784
20785 bool accepts_command_call_inner = false;
20786
20787 // RHS can accept command call iff the value is a call with arguments
20788 // but without parenthesis.
20789 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20790 pm_call_node_t *call_node = (pm_call_node_t *) value;
20791 if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
20792 accepts_command_call_inner = true;
20793 }
20794 }
20795
20796 pm_node_t *right = parse_expression(parser, binding_power, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20797 context_pop(parser);
20798
20799 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20800 }
20801
20802 return value;
20803}
20804
20812static void
20813parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20814 if (call_node->arguments != NULL) {
20815 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20816 pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
20817 call_node->arguments = NULL;
20818 }
20819
20820 if (call_node->block != NULL) {
20821 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20822 pm_node_destroy(parser, (pm_node_t *) call_node->block);
20823 call_node->block = NULL;
20824 }
20825}
20826
20851
20856static void
20857parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20859
20860 pm_parser_t *parser = callback_data->parser;
20861 pm_call_node_t *call = callback_data->call;
20862 pm_constant_id_list_t *names = &callback_data->names;
20863
20864 const uint8_t *source = pm_string_source(capture);
20865 size_t length = pm_string_length(capture);
20866
20867 pm_location_t location;
20868 pm_constant_id_t name;
20869
20870 // If the name of the capture group isn't a valid identifier, we do
20871 // not add it to the local table.
20872 if (!pm_slice_is_valid_local(parser, source, source + length)) return;
20873
20874 if (callback_data->shared) {
20875 // If the unescaped string is a slice of the source, then we can
20876 // copy the names directly. The pointers will line up.
20877 location = (pm_location_t) { .start = source, .end = source + length };
20878 name = pm_parser_constant_id_location(parser, location.start, location.end);
20879 } else {
20880 // Otherwise, the name is a slice of the malloc-ed owned string,
20881 // in which case we need to copy it out into a new string.
20882 location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
20883
20884 void *memory = xmalloc(length);
20885 if (memory == NULL) abort();
20886
20887 memcpy(memory, source, length);
20888 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20889 }
20890
20891 // Add this name to the list of constants if it is valid, not duplicated,
20892 // and not a keyword.
20893 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20894 pm_constant_id_list_append(names, name);
20895
20896 int depth;
20897 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20898 // If the local is not already a local but it is a keyword, then we
20899 // do not want to add a capture for this.
20900 if (pm_local_is_keyword((const char *) source, length)) return;
20901
20902 // If the identifier is not already a local, then we will add it to
20903 // the local table.
20904 pm_parser_local_add(parser, name, location.start, location.end, 0);
20905 }
20906
20907 // Here we lazily create the MatchWriteNode since we know we're
20908 // about to add a target.
20909 if (callback_data->match == NULL) {
20910 callback_data->match = pm_match_write_node_create(parser, call);
20911 }
20912
20913 // Next, create the local variable target and add it to the list of
20914 // targets for the match.
20915 pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
20916 pm_node_list_append(&callback_data->match->targets, target);
20917 }
20918}
20919
20924static pm_node_t *
20925parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20927 .parser = parser,
20928 .call = call,
20929 .names = { 0 },
20930 .shared = content->type == PM_STRING_SHARED
20931 };
20932
20934 .parser = parser,
20935 .start = call->receiver->location.start,
20936 .end = call->receiver->location.end,
20937 .shared = content->type == PM_STRING_SHARED
20938 };
20939
20940 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20941 pm_constant_id_list_free(&callback_data.names);
20942
20943 if (callback_data.match != NULL) {
20944 return (pm_node_t *) callback_data.match;
20945 } else {
20946 return (pm_node_t *) call;
20947 }
20948}
20949
20950static inline pm_node_t *
20951parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
20952 pm_token_t token = parser->current;
20953
20954 switch (token.type) {
20955 case PM_TOKEN_EQUAL: {
20956 switch (PM_NODE_TYPE(node)) {
20957 case PM_CALL_NODE: {
20958 // If we have no arguments to the call node and we need this
20959 // to be a target then this is either a method call or a
20960 // local variable write. This _must_ happen before the value
20961 // is parsed because it could be referenced in the value.
20962 pm_call_node_t *call_node = (pm_call_node_t *) node;
20964 pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
20965 }
20966 }
20967 /* fallthrough */
20968 case PM_CASE_WRITABLE: {
20969 parser_lex(parser);
20970 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20971
20972 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
20973 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
20974 }
20975
20976 return parse_write(parser, node, &token, value);
20977 }
20978 case PM_SPLAT_NODE: {
20979 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
20980 pm_multi_target_node_targets_append(parser, multi_target, node);
20981
20982 parser_lex(parser);
20983 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20984 return parse_write(parser, (pm_node_t *) multi_target, &token, value);
20985 }
20987 case PM_FALSE_NODE:
20990 case PM_NIL_NODE:
20991 case PM_SELF_NODE:
20992 case PM_TRUE_NODE: {
20993 // In these special cases, we have specific error messages
20994 // and we will replace them with local variable writes.
20995 parser_lex(parser);
20996 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20997 return parse_unwriteable_write(parser, node, &token, value);
20998 }
20999 default:
21000 // In this case we have an = sign, but we don't know what
21001 // it's for. We need to treat it as an error. We'll mark it
21002 // as an error and skip past it.
21003 parser_lex(parser);
21004 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
21005 return node;
21006 }
21007 }
21009 switch (PM_NODE_TYPE(node)) {
21012 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21013 /* fallthrough */
21015 parser_lex(parser);
21016
21017 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21018 pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
21019
21020 pm_node_destroy(parser, node);
21021 return result;
21022 }
21024 parser_lex(parser);
21025
21026 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21027 pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21028
21029 pm_node_destroy(parser, node);
21030 return result;
21031 }
21032 case PM_CONSTANT_PATH_NODE: {
21033 parser_lex(parser);
21034
21035 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21036 pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21037
21038 return parse_shareable_constant_write(parser, write);
21039 }
21040 case PM_CONSTANT_READ_NODE: {
21041 parser_lex(parser);
21042
21043 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21044 pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21045
21046 pm_node_destroy(parser, node);
21047 return parse_shareable_constant_write(parser, write);
21048 }
21050 parser_lex(parser);
21051
21052 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21053 pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21054
21055 pm_node_destroy(parser, node);
21056 return result;
21057 }
21060 parser_lex(parser);
21061
21062 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21063 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21064
21065 pm_node_destroy(parser, node);
21066 return result;
21067 }
21068 case PM_CALL_NODE: {
21069 pm_call_node_t *cast = (pm_call_node_t *) node;
21070
21071 // If we have a vcall (a method with no arguments and no
21072 // receiver that could have been a local variable) then we
21073 // will transform it into a local variable write.
21075 pm_location_t *message_loc = &cast->message_loc;
21076 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21077
21078 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21079 parser_lex(parser);
21080
21081 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21082 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21083
21084 pm_node_destroy(parser, (pm_node_t *) cast);
21085 return result;
21086 }
21087
21088 // Move past the token here so that we have already added
21089 // the local variable by this point.
21090 parser_lex(parser);
21091
21092 // If there is no call operator and the message is "[]" then
21093 // this is an aref expression, and we can transform it into
21094 // an aset expression.
21095 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21096 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21097 return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
21098 }
21099
21100 // If this node cannot be writable, then we have an error.
21101 if (pm_call_node_writable_p(parser, cast)) {
21102 parse_write_name(parser, &cast->name);
21103 } else {
21104 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21105 }
21106
21107 parse_call_operator_write(parser, cast, &token);
21108 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21109 return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
21110 }
21111 case PM_MULTI_WRITE_NODE: {
21112 parser_lex(parser);
21113 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21114 return node;
21115 }
21116 default:
21117 parser_lex(parser);
21118
21119 // In this case we have an &&= sign, but we don't know what it's for.
21120 // We need to treat it as an error. For now, we'll mark it as an error
21121 // and just skip right past it.
21122 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21123 return node;
21124 }
21125 }
21127 switch (PM_NODE_TYPE(node)) {
21130 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21131 /* fallthrough */
21133 parser_lex(parser);
21134
21135 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21136 pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
21137
21138 pm_node_destroy(parser, node);
21139 return result;
21140 }
21142 parser_lex(parser);
21143
21144 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21145 pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21146
21147 pm_node_destroy(parser, node);
21148 return result;
21149 }
21150 case PM_CONSTANT_PATH_NODE: {
21151 parser_lex(parser);
21152
21153 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21154 pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21155
21156 return parse_shareable_constant_write(parser, write);
21157 }
21158 case PM_CONSTANT_READ_NODE: {
21159 parser_lex(parser);
21160
21161 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21162 pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21163
21164 pm_node_destroy(parser, node);
21165 return parse_shareable_constant_write(parser, write);
21166 }
21168 parser_lex(parser);
21169
21170 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21171 pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21172
21173 pm_node_destroy(parser, node);
21174 return result;
21175 }
21178 parser_lex(parser);
21179
21180 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21181 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21182
21183 pm_node_destroy(parser, node);
21184 return result;
21185 }
21186 case PM_CALL_NODE: {
21187 pm_call_node_t *cast = (pm_call_node_t *) node;
21188
21189 // If we have a vcall (a method with no arguments and no
21190 // receiver that could have been a local variable) then we
21191 // will transform it into a local variable write.
21193 pm_location_t *message_loc = &cast->message_loc;
21194 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21195
21196 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21197 parser_lex(parser);
21198
21199 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21200 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21201
21202 pm_node_destroy(parser, (pm_node_t *) cast);
21203 return result;
21204 }
21205
21206 // Move past the token here so that we have already added
21207 // the local variable by this point.
21208 parser_lex(parser);
21209
21210 // If there is no call operator and the message is "[]" then
21211 // this is an aref expression, and we can transform it into
21212 // an aset expression.
21213 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21214 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21215 return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
21216 }
21217
21218 // If this node cannot be writable, then we have an error.
21219 if (pm_call_node_writable_p(parser, cast)) {
21220 parse_write_name(parser, &cast->name);
21221 } else {
21222 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21223 }
21224
21225 parse_call_operator_write(parser, cast, &token);
21226 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21227 return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
21228 }
21229 case PM_MULTI_WRITE_NODE: {
21230 parser_lex(parser);
21231 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21232 return node;
21233 }
21234 default:
21235 parser_lex(parser);
21236
21237 // In this case we have an ||= sign, but we don't know what it's for.
21238 // We need to treat it as an error. For now, we'll mark it as an error
21239 // and just skip right past it.
21240 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21241 return node;
21242 }
21243 }
21255 switch (PM_NODE_TYPE(node)) {
21258 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21259 /* fallthrough */
21261 parser_lex(parser);
21262
21263 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21264 pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
21265
21266 pm_node_destroy(parser, node);
21267 return result;
21268 }
21270 parser_lex(parser);
21271
21272 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21273 pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21274
21275 pm_node_destroy(parser, node);
21276 return result;
21277 }
21278 case PM_CONSTANT_PATH_NODE: {
21279 parser_lex(parser);
21280
21281 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21282 pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21283
21284 return parse_shareable_constant_write(parser, write);
21285 }
21286 case PM_CONSTANT_READ_NODE: {
21287 parser_lex(parser);
21288
21289 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21290 pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21291
21292 pm_node_destroy(parser, node);
21293 return parse_shareable_constant_write(parser, write);
21294 }
21296 parser_lex(parser);
21297
21298 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21299 pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21300
21301 pm_node_destroy(parser, node);
21302 return result;
21303 }
21306 parser_lex(parser);
21307
21308 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21309 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21310
21311 pm_node_destroy(parser, node);
21312 return result;
21313 }
21314 case PM_CALL_NODE: {
21315 parser_lex(parser);
21316 pm_call_node_t *cast = (pm_call_node_t *) node;
21317
21318 // If we have a vcall (a method with no arguments and no
21319 // receiver that could have been a local variable) then we
21320 // will transform it into a local variable write.
21322 pm_location_t *message_loc = &cast->message_loc;
21323 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21324
21325 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21326 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21327 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21328
21329 pm_node_destroy(parser, (pm_node_t *) cast);
21330 return result;
21331 }
21332
21333 // If there is no call operator and the message is "[]" then
21334 // this is an aref expression, and we can transform it into
21335 // an aset expression.
21336 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21337 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21338 return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
21339 }
21340
21341 // If this node cannot be writable, then we have an error.
21342 if (pm_call_node_writable_p(parser, cast)) {
21343 parse_write_name(parser, &cast->name);
21344 } else {
21345 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21346 }
21347
21348 parse_call_operator_write(parser, cast, &token);
21349 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21350 return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
21351 }
21352 case PM_MULTI_WRITE_NODE: {
21353 parser_lex(parser);
21354 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21355 return node;
21356 }
21357 default:
21358 parser_lex(parser);
21359
21360 // In this case we have an operator but we don't know what it's for.
21361 // We need to treat it as an error. For now, we'll mark it as an error
21362 // and just skip right past it.
21363 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21364 return node;
21365 }
21366 }
21368 case PM_TOKEN_KEYWORD_AND: {
21369 parser_lex(parser);
21370
21371 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21372 return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
21373 }
21375 case PM_TOKEN_PIPE_PIPE: {
21376 parser_lex(parser);
21377
21378 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21379 return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
21380 }
21381 case PM_TOKEN_EQUAL_TILDE: {
21382 // Note that we _must_ parse the value before adding the local
21383 // variables in order to properly mirror the behavior of Ruby. For
21384 // example,
21385 //
21386 // /(?<foo>bar)/ =~ foo
21387 //
21388 // In this case, `foo` should be a method call and not a local yet.
21389 parser_lex(parser);
21390 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21391
21392 // By default, we're going to create a call node and then return it.
21393 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21394 pm_node_t *result = (pm_node_t *) call;
21395
21396 // If the receiver of this =~ is a regular expression node, then we
21397 // need to introduce local variables for it based on its named
21398 // capture groups.
21400 // It's possible to have an interpolated regular expression node
21401 // that only contains strings. This is because it can be split
21402 // up by a heredoc. In this case we need to concat the unescaped
21403 // strings together and then parse them as a regular expression.
21405
21406 bool interpolated = false;
21407 size_t total_length = 0;
21408
21409 pm_node_t *part;
21410 PM_NODE_LIST_FOREACH(parts, index, part) {
21411 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21412 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21413 } else {
21414 interpolated = true;
21415 break;
21416 }
21417 }
21418
21419 if (!interpolated && total_length > 0) {
21420 void *memory = xmalloc(total_length);
21421 if (!memory) abort();
21422
21423 uint8_t *cursor = memory;
21424 PM_NODE_LIST_FOREACH(parts, index, part) {
21425 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21426 size_t length = pm_string_length(unescaped);
21427
21428 memcpy(cursor, pm_string_source(unescaped), length);
21429 cursor += length;
21430 }
21431
21432 pm_string_t owned;
21433 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21434
21435 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21436 pm_string_free(&owned);
21437 }
21438 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21439 // If we have a regular expression node, then we can just parse
21440 // the named captures directly off the unescaped string.
21441 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21442 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21443 }
21444
21445 return result;
21446 }
21448 case PM_TOKEN_USTAR:
21450 // The only times this will occur are when we are in an error state,
21451 // but we'll put them in here so that errors can propagate.
21457 case PM_TOKEN_CARET:
21458 case PM_TOKEN_PIPE:
21459 case PM_TOKEN_AMPERSAND:
21461 case PM_TOKEN_LESS_LESS:
21462 case PM_TOKEN_MINUS:
21463 case PM_TOKEN_PLUS:
21464 case PM_TOKEN_PERCENT:
21465 case PM_TOKEN_SLASH:
21466 case PM_TOKEN_STAR:
21467 case PM_TOKEN_STAR_STAR: {
21468 parser_lex(parser);
21469 pm_token_t operator = parser->previous;
21470 switch (PM_NODE_TYPE(node)) {
21474 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21475 }
21476 break;
21477 }
21478 case PM_AND_NODE: {
21479 pm_and_node_t *cast = (pm_and_node_t *) node;
21481 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21482 }
21483 break;
21484 }
21485 case PM_OR_NODE: {
21486 pm_or_node_t *cast = (pm_or_node_t *) node;
21488 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21489 }
21490 break;
21491 }
21492 default:
21493 break;
21494 }
21495
21496 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21497 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21498 }
21499 case PM_TOKEN_GREATER:
21501 case PM_TOKEN_LESS:
21502 case PM_TOKEN_LESS_EQUAL: {
21503 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21504 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21505 }
21506
21507 parser_lex(parser);
21508 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21509 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
21510 }
21512 case PM_TOKEN_DOT: {
21513 parser_lex(parser);
21514 pm_token_t operator = parser->previous;
21515 pm_arguments_t arguments = { 0 };
21516
21517 // This if statement handles the foo.() syntax.
21518 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21519 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21520 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21521 }
21522
21523 switch (PM_NODE_TYPE(node)) {
21527 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21528 }
21529 break;
21530 }
21531 case PM_AND_NODE: {
21532 pm_and_node_t *cast = (pm_and_node_t *) node;
21534 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21535 }
21536 break;
21537 }
21538 case PM_OR_NODE: {
21539 pm_or_node_t *cast = (pm_or_node_t *) node;
21541 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21542 }
21543 break;
21544 }
21545 default:
21546 break;
21547 }
21548
21549 pm_token_t message;
21550
21551 switch (parser->current.type) {
21552 case PM_CASE_OPERATOR:
21553 case PM_CASE_KEYWORD:
21554 case PM_TOKEN_CONSTANT:
21556 case PM_TOKEN_METHOD_NAME: {
21557 parser_lex(parser);
21558 message = parser->previous;
21559 break;
21560 }
21561 default: {
21562 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21563 message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21564 }
21565 }
21566
21567 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21568 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21569
21570 if (
21571 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21572 arguments.arguments == NULL &&
21573 arguments.opening_loc.start == NULL &&
21574 match1(parser, PM_TOKEN_COMMA)
21575 ) {
21576 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21577 } else {
21578 return (pm_node_t *) call;
21579 }
21580 }
21581 case PM_TOKEN_DOT_DOT:
21582 case PM_TOKEN_DOT_DOT_DOT: {
21583 parser_lex(parser);
21584
21585 pm_node_t *right = NULL;
21586 if (token_begins_expression_p(parser->current.type)) {
21587 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21588 }
21589
21590 return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
21591 }
21593 pm_token_t keyword = parser->current;
21594 parser_lex(parser);
21595
21596 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21597 return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
21598 }
21600 pm_token_t keyword = parser->current;
21601 parser_lex(parser);
21602
21603 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21604 return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
21605 }
21607 parser_lex(parser);
21608 pm_statements_node_t *statements = pm_statements_node_create(parser);
21609 pm_statements_node_body_append(parser, statements, node, true);
21610
21611 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21612 return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21613 }
21615 parser_lex(parser);
21616 pm_statements_node_t *statements = pm_statements_node_create(parser);
21617 pm_statements_node_body_append(parser, statements, node, true);
21618
21619 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21620 return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21621 }
21623 context_push(parser, PM_CONTEXT_TERNARY);
21624 pm_node_list_t current_block_exits = { 0 };
21625 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21626
21627 pm_token_t qmark = parser->current;
21628 parser_lex(parser);
21629
21630 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21631
21632 if (parser->recovering) {
21633 // If parsing the true expression of this ternary resulted in a syntax
21634 // error that we can recover from, then we're going to put missing nodes
21635 // and tokens into the remaining places. We want to be sure to do this
21636 // before the `expect` function call to make sure it doesn't
21637 // accidentally move past a ':' token that occurs after the syntax
21638 // error.
21639 pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21640 pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
21641
21642 context_pop(parser);
21643 pop_block_exits(parser, previous_block_exits);
21644 pm_node_list_free(&current_block_exits);
21645
21646 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21647 }
21648
21649 accept1(parser, PM_TOKEN_NEWLINE);
21650 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21651
21652 pm_token_t colon = parser->previous;
21653 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21654
21655 context_pop(parser);
21656 pop_block_exits(parser, previous_block_exits);
21657 pm_node_list_free(&current_block_exits);
21658
21659 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21660 }
21661 case PM_TOKEN_COLON_COLON: {
21662 parser_lex(parser);
21663 pm_token_t delimiter = parser->previous;
21664
21665 switch (parser->current.type) {
21666 case PM_TOKEN_CONSTANT: {
21667 parser_lex(parser);
21668 pm_node_t *path;
21669
21670 if (
21671 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21672 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21673 ) {
21674 // If we have a constant immediately following a '::' operator, then
21675 // this can either be a constant path or a method call, depending on
21676 // what follows the constant.
21677 //
21678 // If we have parentheses, then this is a method call. That would
21679 // look like Foo::Bar().
21680 pm_token_t message = parser->previous;
21681 pm_arguments_t arguments = { 0 };
21682
21683 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21684 path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21685 } else {
21686 // Otherwise, this is a constant path. That would look like Foo::Bar.
21687 path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21688 }
21689
21690 // If this is followed by a comma then it is a multiple assignment.
21691 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21692 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21693 }
21694
21695 return path;
21696 }
21697 case PM_CASE_OPERATOR:
21698 case PM_CASE_KEYWORD:
21700 case PM_TOKEN_METHOD_NAME: {
21701 parser_lex(parser);
21702 pm_token_t message = parser->previous;
21703
21704 // If we have an identifier following a '::' operator, then it is for
21705 // sure a method call.
21706 pm_arguments_t arguments = { 0 };
21707 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21708 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21709
21710 // If this is followed by a comma then it is a multiple assignment.
21711 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21712 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21713 }
21714
21715 return (pm_node_t *) call;
21716 }
21718 // If we have a parenthesis following a '::' operator, then it is the
21719 // method call shorthand. That would look like Foo::(bar).
21720 pm_arguments_t arguments = { 0 };
21721 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21722
21723 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
21724 }
21725 default: {
21726 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21727 return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21728 }
21729 }
21730 }
21732 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21733 parser_lex(parser);
21734 accept1(parser, PM_TOKEN_NEWLINE);
21735
21736 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21737 context_pop(parser);
21738
21739 return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
21740 }
21741 case PM_TOKEN_BRACKET_LEFT: {
21742 parser_lex(parser);
21743
21744 pm_arguments_t arguments = { 0 };
21745 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21746
21747 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21748 pm_accepts_block_stack_push(parser, true);
21749 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
21750 pm_accepts_block_stack_pop(parser);
21751 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21752 }
21753
21754 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21755
21756 // If we have a comma after the closing bracket then this is a multiple
21757 // assignment and we should parse the targets.
21758 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21759 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21760 return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21761 }
21762
21763 // If we're at the end of the arguments, we can now check if there is a
21764 // block node that starts with a {. If there is, then we can parse it and
21765 // add it to the arguments.
21766 pm_block_node_t *block = NULL;
21767 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21768 block = parse_block(parser, (uint16_t) (depth + 1));
21769 pm_arguments_validate_block(parser, &arguments, block);
21770 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21771 block = parse_block(parser, (uint16_t) (depth + 1));
21772 }
21773
21774 if (block != NULL) {
21775 if (arguments.block != NULL) {
21776 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
21777 if (arguments.arguments == NULL) {
21778 arguments.arguments = pm_arguments_node_create(parser);
21779 }
21780 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
21781 }
21782
21783 arguments.block = (pm_node_t *) block;
21784 }
21785
21786 return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
21787 }
21788 case PM_TOKEN_KEYWORD_IN: {
21789 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21790 parser->pattern_matching_newlines = true;
21791
21792 pm_token_t operator = parser->current;
21793 parser->command_start = false;
21794 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21795 parser_lex(parser);
21796
21797 pm_constant_id_list_t captures = { 0 };
21798 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21799
21800 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21801 pm_constant_id_list_free(&captures);
21802
21803 return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
21804 }
21806 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21807 parser->pattern_matching_newlines = true;
21808
21809 pm_token_t operator = parser->current;
21810 parser->command_start = false;
21811 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21812 parser_lex(parser);
21813
21814 pm_constant_id_list_t captures = { 0 };
21815 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21816
21817 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21818 pm_constant_id_list_free(&captures);
21819
21820 return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
21821 }
21822 default:
21823 assert(false && "unreachable");
21824 return NULL;
21825 }
21826}
21827
21828#undef PM_PARSE_PATTERN_SINGLE
21829#undef PM_PARSE_PATTERN_TOP
21830#undef PM_PARSE_PATTERN_MULTI
21831
21836static inline bool
21837pm_call_node_command_p(const pm_call_node_t *node) {
21838 return (
21839 (node->opening_loc.start == NULL) &&
21840 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
21841 (node->arguments != NULL || node->block != NULL)
21842 );
21843}
21844
21853static pm_node_t *
21854parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
21855 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21856 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21857 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
21858 }
21859
21860 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
21861
21862 switch (PM_NODE_TYPE(node)) {
21863 case PM_MISSING_NODE:
21864 // If we found a syntax error, then the type of node returned by
21865 // parse_expression_prefix is going to be a missing node.
21866 return node;
21872 case PM_UNDEF_NODE:
21873 // These expressions are statements, and cannot be followed by
21874 // operators (except modifiers).
21875 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21876 return node;
21877 }
21878 break;
21879 case PM_CALL_NODE:
21880 // If we have a call node, then we need to check if it looks like a
21881 // method call without parentheses that contains arguments. If it
21882 // does, then it has different rules for parsing infix operators,
21883 // namely that it only accepts composition (and/or) and modifiers
21884 // (if/unless/etc.).
21885 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
21886 return node;
21887 }
21888 break;
21889 case PM_SYMBOL_NODE:
21890 // If we have a symbol node that is being parsed as a label, then we
21891 // need to immediately return, because there should never be an
21892 // infix operator following this node.
21893 if (pm_symbol_node_label_p(node)) {
21894 return node;
21895 }
21896 default:
21897 break;
21898 }
21899
21900 // Otherwise we'll look and see if the next token can be parsed as an infix
21901 // operator. If it can, then we'll parse it using parse_expression_infix.
21902 pm_binding_powers_t current_binding_powers;
21903 pm_token_type_t current_token_type;
21904
21905 while (
21906 current_token_type = parser->current.type,
21907 current_binding_powers = pm_binding_powers[current_token_type],
21908 binding_power <= current_binding_powers.left &&
21909 current_binding_powers.binary
21910 ) {
21911 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
21912
21913 switch (PM_NODE_TYPE(node)) {
21915 // Multi-write nodes are statements, and cannot be followed by
21916 // operators except modifiers.
21917 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21918 return node;
21919 }
21920 break;
21927 // These expressions are statements, by virtue of the right-hand
21928 // side of their write being an implicit array.
21929 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21930 return node;
21931 }
21932 break;
21933 case PM_CALL_NODE:
21934 // These expressions are also statements, by virtue of the
21935 // right-hand side of the expression (i.e., the last argument to
21936 // the call node) being an implicit array.
21937 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21938 return node;
21939 }
21940 break;
21941 default:
21942 break;
21943 }
21944
21945 // If the operator is nonassoc and we should not be able to parse the
21946 // upcoming infix operator, break.
21947 if (current_binding_powers.nonassoc) {
21948 // If this is a non-assoc operator and we are about to parse the
21949 // exact same operator, then we need to add an error.
21950 if (match1(parser, current_token_type)) {
21951 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21952 break;
21953 }
21954
21955 // If this is an endless range, then we need to reject a couple of
21956 // additional operators because it violates the normal operator
21957 // precedence rules. Those patterns are:
21958 //
21959 // 1.. & 2
21960 // 1.. * 2
21961 //
21962 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
21964 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21965 break;
21966 }
21967
21968 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
21969 break;
21970 }
21971 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
21972 break;
21973 }
21974 }
21975
21976 if (accepts_command_call) {
21977 // A command-style method call is only accepted on method chains.
21978 // Thus, we check whether the parsed node can continue method chains.
21979 // The method chain can continue if the parsed node is one of the following five kinds:
21980 // (1) index access: foo[1]
21981 // (2) attribute access: foo.bar
21982 // (3) method call with parenthesis: foo.bar(1)
21983 // (4) method call with a block: foo.bar do end
21984 // (5) constant path: foo::Bar
21985 switch (node->type) {
21986 case PM_CALL_NODE: {
21987 pm_call_node_t *cast = (pm_call_node_t *)node;
21988 if (
21989 // (1) foo[1]
21990 !(
21991 cast->call_operator_loc.start == NULL &&
21992 cast->message_loc.start != NULL &&
21993 cast->message_loc.start[0] == '[' &&
21994 cast->message_loc.end[-1] == ']'
21995 ) &&
21996 // (2) foo.bar
21997 !(
21998 cast->call_operator_loc.start != NULL &&
21999 cast->arguments == NULL &&
22000 cast->block == NULL &&
22001 cast->opening_loc.start == NULL
22002 ) &&
22003 // (3) foo.bar(1)
22004 !(
22005 cast->call_operator_loc.start != NULL &&
22006 cast->opening_loc.start != NULL
22007 ) &&
22008 // (4) foo.bar do end
22009 !(
22010 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
22011 )
22012 ) {
22013 accepts_command_call = false;
22014 }
22015 break;
22016 }
22017 // (5) foo::Bar
22019 break;
22020 default:
22021 accepts_command_call = false;
22022 break;
22023 }
22024 }
22025 }
22026
22027 return node;
22028}
22029
22034static pm_statements_node_t *
22035wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
22036 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
22037 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22038 pm_arguments_node_arguments_append(
22039 arguments,
22040 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
22041 );
22042
22043 pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
22044 parser,
22045 arguments,
22046 pm_parser_constant_id_constant(parser, "print", 5)
22047 ), true);
22048 }
22049
22050 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
22051 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
22052 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22053 pm_arguments_node_arguments_append(
22054 arguments,
22055 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
22056 );
22057
22058 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
22059 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
22060
22061 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
22062 parser,
22063 pm_parser_constant_id_constant(parser, "$F", 2),
22064 (pm_node_t *) call
22065 );
22066
22067 pm_statements_node_body_prepend(statements, (pm_node_t *) write);
22068 }
22069
22070 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22071 pm_arguments_node_arguments_append(
22072 arguments,
22073 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
22074 );
22075
22076 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
22077 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
22078 pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
22079 parser,
22080 (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
22081 &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
22082 (pm_node_t *) pm_true_node_synthesized_create(parser)
22083 ));
22084
22085 pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
22086 pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
22087 }
22088
22089 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
22090 pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
22091 parser,
22092 (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
22093 statements
22094 ), true);
22095
22096 statements = wrapped_statements;
22097 }
22098
22099 return statements;
22100}
22101
22105static pm_node_t *
22106parse_program(pm_parser_t *parser) {
22107 // If the current scope is NULL, then we want to push a new top level scope.
22108 // The current scope could exist in the event that we are parsing an eval
22109 // and the user has passed into scopes that already exist.
22110 if (parser->current_scope == NULL) {
22111 pm_parser_scope_push(parser, true);
22112 }
22113
22114 pm_node_list_t current_block_exits = { 0 };
22115 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22116
22117 parser_lex(parser);
22118 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22119
22120 if (statements == NULL) {
22121 statements = pm_statements_node_create(parser);
22122 } else if (!parser->parsing_eval) {
22123 // If we have statements, then the top-level statement should be
22124 // explicitly checked as well. We have to do this here because
22125 // everywhere else we check all but the last statement.
22126 assert(statements->body.size > 0);
22127 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22128 }
22129
22130 pm_constant_id_list_t locals;
22131 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22132 pm_parser_scope_pop(parser);
22133
22134 // If this is an empty file, then we're still going to parse all of the
22135 // statements in order to gather up all of the comments and such. Here we'll
22136 // correct the location information.
22137 if (pm_statements_node_body_length(statements) == 0) {
22138 pm_statements_node_location_set(statements, parser->start, parser->start);
22139 }
22140
22141 // At the top level, see if we need to wrap the statements in a program
22142 // node with a while loop based on the options.
22144 statements = wrap_statements(parser, statements);
22145 } else {
22146 flush_block_exits(parser, previous_block_exits);
22147 pm_node_list_free(&current_block_exits);
22148 }
22149
22150 return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
22151}
22152
22153/******************************************************************************/
22154/* External functions */
22155/******************************************************************************/
22156
22166static const char *
22167pm_strnstr(const char *big, const char *little, size_t big_length) {
22168 size_t little_length = strlen(little);
22169
22170 for (const char *big_end = big + big_length; big < big_end; big++) {
22171 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22172 }
22173
22174 return NULL;
22175}
22176
22177#ifdef _WIN32
22178#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22179#else
22185static void
22186pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22187 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22188 pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
22189 }
22190}
22191#endif
22192
22197static void
22198pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22199 const char *switches = pm_strnstr(engine, " -", length);
22200 if (switches == NULL) return;
22201
22202 pm_options_t next_options = *options;
22203 options->shebang_callback(
22204 &next_options,
22205 (const uint8_t *) (switches + 1),
22206 length - ((size_t) (switches - engine)) - 1,
22207 options->shebang_callback_data
22208 );
22209
22210 size_t encoding_length;
22211 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22212 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22213 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22214 }
22215
22216 parser->command_line = next_options.command_line;
22217 parser->frozen_string_literal = next_options.frozen_string_literal;
22218}
22219
22224pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22225 assert(source != NULL);
22226
22227 *parser = (pm_parser_t) {
22228 .node_id = 0,
22229 .lex_state = PM_LEX_STATE_BEG,
22230 .enclosure_nesting = 0,
22231 .lambda_enclosure_nesting = -1,
22232 .brace_nesting = 0,
22233 .do_loop_stack = 0,
22234 .accepts_block_stack = 0,
22235 .lex_modes = {
22236 .index = 0,
22237 .stack = {{ .mode = PM_LEX_DEFAULT }},
22238 .current = &parser->lex_modes.stack[0],
22239 },
22240 .start = source,
22241 .end = source + size,
22242 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22243 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22244 .next_start = NULL,
22245 .heredoc_end = NULL,
22246 .data_loc = { .start = NULL, .end = NULL },
22247 .comment_list = { 0 },
22248 .magic_comment_list = { 0 },
22249 .warning_list = { 0 },
22250 .error_list = { 0 },
22251 .current_scope = NULL,
22252 .current_context = NULL,
22253 .encoding = PM_ENCODING_UTF_8_ENTRY,
22254 .encoding_changed_callback = NULL,
22255 .encoding_comment_start = source,
22256 .lex_callback = NULL,
22257 .filepath = { 0 },
22258 .constant_pool = { 0 },
22259 .newline_list = { 0 },
22260 .integer_base = 0,
22261 .current_string = PM_STRING_EMPTY,
22262 .start_line = 1,
22263 .explicit_encoding = NULL,
22264 .command_line = 0,
22265 .parsing_eval = false,
22266 .partial_script = false,
22267 .command_start = true,
22268 .recovering = false,
22269 .encoding_locked = false,
22270 .encoding_changed = false,
22271 .pattern_matching_newlines = false,
22272 .in_keyword_arg = false,
22273 .current_block_exits = NULL,
22274 .semantic_token_seen = false,
22275 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22276 .current_regular_expression_ascii_only = false,
22277 .warn_mismatched_indentation = true
22278 };
22279
22280 // Initialize the constant pool. We're going to completely guess as to the
22281 // number of constants that we'll need based on the size of the input. The
22282 // ratio we chose here is actually less arbitrary than you might think.
22283 //
22284 // We took ~50K Ruby files and measured the size of the file versus the
22285 // number of constants that were found in those files. Then we found the
22286 // average and standard deviation of the ratios of constants/bytesize. Then
22287 // we added 1.34 standard deviations to the average to get a ratio that
22288 // would fit 75% of the files (for a two-tailed distribution). This works
22289 // because there was about a 0.77 correlation and the distribution was
22290 // roughly normal.
22291 //
22292 // This ratio will need to change if we add more constants to the constant
22293 // pool for another node type.
22294 uint32_t constant_size = ((uint32_t) size) / 95;
22295 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22296
22297 // Initialize the newline list. Similar to the constant pool, we're going to
22298 // guess at the number of newlines that we'll need based on the size of the
22299 // input.
22300 size_t newline_size = size / 22;
22301 pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22302
22303 // If options were provided to this parse, establish them here.
22304 if (options != NULL) {
22305 // filepath option
22306 parser->filepath = options->filepath;
22307
22308 // line option
22309 parser->start_line = options->line;
22310
22311 // encoding option
22312 size_t encoding_length = pm_string_length(&options->encoding);
22313 if (encoding_length > 0) {
22314 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22315 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22316 }
22317
22318 // encoding_locked option
22319 parser->encoding_locked = options->encoding_locked;
22320
22321 // frozen_string_literal option
22323
22324 // command_line option
22325 parser->command_line = options->command_line;
22326
22327 // version option
22328 parser->version = options->version;
22329
22330 // partial_script
22331 parser->partial_script = options->partial_script;
22332
22333 // scopes option
22334 parser->parsing_eval = options->scopes_count > 0;
22335 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22336
22337 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22338 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22339 pm_parser_scope_push(parser, scope_index == 0);
22340
22341 // Scopes given from the outside are not allowed to have numbered
22342 // parameters.
22343 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22344
22345 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22346 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22347
22348 const uint8_t *source = pm_string_source(local);
22349 size_t length = pm_string_length(local);
22350
22351 void *allocated = xmalloc(length);
22352 if (allocated == NULL) continue;
22353
22354 memcpy(allocated, source, length);
22355 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22356 }
22357 }
22358 }
22359
22360 pm_accepts_block_stack_push(parser, true);
22361
22362 // Skip past the UTF-8 BOM if it exists.
22363 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22364 parser->current.end += 3;
22365 parser->encoding_comment_start += 3;
22366
22367 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22369 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22370 }
22371 }
22372
22373 // If the -x command line flag is set, or the first shebang of the file does
22374 // not include "ruby", then we'll search for a shebang that does include
22375 // "ruby" and start parsing from there.
22376 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22377
22378 // If the first two bytes of the source are a shebang, then we will do a bit
22379 // of extra processing.
22380 //
22381 // First, we'll indicate that the encoding comment is at the end of the
22382 // shebang. This means that when a shebang is present the encoding comment
22383 // can begin on the second line.
22384 //
22385 // Second, we will check if the shebang includes "ruby". If it does, then we
22386 // we will start parsing from there. We will also potentially warning the
22387 // user if there is a carriage return at the end of the shebang. We will
22388 // also potentially call the shebang callback if this is the main script to
22389 // allow the caller to parse the shebang and find any command-line options.
22390 // If the shebang does not include "ruby" and this is the main script being
22391 // parsed, then we will start searching the file for a shebang that does
22392 // contain "ruby" as if -x were passed on the command line.
22393 const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
22394 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
22395
22396 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22397 const char *engine;
22398
22399 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22400 if (newline != NULL) {
22401 parser->encoding_comment_start = newline + 1;
22402
22403 if (options == NULL || options->main_script) {
22404 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22405 }
22406 }
22407
22408 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22409 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22410 }
22411
22412 search_shebang = false;
22413 } else if (options->main_script && !parser->parsing_eval) {
22414 search_shebang = true;
22415 }
22416 }
22417
22418 // Here we're going to find the first shebang that includes "ruby" and start
22419 // parsing from there.
22420 if (search_shebang) {
22421 // If a shebang that includes "ruby" is not found, then we're going to a
22422 // a load error to the list of errors on the parser.
22423 bool found_shebang = false;
22424
22425 // This is going to point to the start of each line as we check it.
22426 // We'll maintain a moving window looking at each line at they come.
22427 const uint8_t *cursor = parser->start;
22428
22429 // The newline pointer points to the end of the current line that we're
22430 // considering. If it is NULL, then we're at the end of the file.
22431 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22432
22433 while (newline != NULL) {
22434 pm_newline_list_append(&parser->newline_list, newline);
22435
22436 cursor = newline + 1;
22437 newline = next_newline(cursor, parser->end - cursor);
22438
22439 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22440 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22441 const char *engine;
22442 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22443 found_shebang = true;
22444
22445 if (newline != NULL) {
22446 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22447 parser->encoding_comment_start = newline + 1;
22448 }
22449
22450 if (options != NULL && options->shebang_callback != NULL) {
22451 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22452 }
22453
22454 break;
22455 }
22456 }
22457 }
22458
22459 if (found_shebang) {
22460 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22461 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22462 } else {
22463 pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22464 pm_newline_list_clear(&parser->newline_list);
22465 }
22466 }
22467
22468 // The encoding comment can start after any amount of inline whitespace, so
22469 // here we'll advance it to the first non-inline-whitespace character so
22470 // that it is ready for future comparisons.
22471 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22472}
22473
22479pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback) {
22480 parser->encoding_changed_callback = callback;
22481}
22482
22486static inline void
22487pm_comment_list_free(pm_list_t *list) {
22488 pm_list_node_t *node, *next;
22489
22490 for (node = list->head; node != NULL; node = next) {
22491 next = node->next;
22492
22493 pm_comment_t *comment = (pm_comment_t *) node;
22494 xfree(comment);
22495 }
22496}
22497
22501static inline void
22502pm_magic_comment_list_free(pm_list_t *list) {
22503 pm_list_node_t *node, *next;
22504
22505 for (node = list->head; node != NULL; node = next) {
22506 next = node->next;
22507
22510 }
22511}
22512
22517pm_parser_free(pm_parser_t *parser) {
22518 pm_string_free(&parser->filepath);
22519 pm_diagnostic_list_free(&parser->error_list);
22520 pm_diagnostic_list_free(&parser->warning_list);
22521 pm_comment_list_free(&parser->comment_list);
22522 pm_magic_comment_list_free(&parser->magic_comment_list);
22523 pm_constant_pool_free(&parser->constant_pool);
22524 pm_newline_list_free(&parser->newline_list);
22525
22526 while (parser->current_scope != NULL) {
22527 // Normally, popping the scope doesn't free the locals since it is
22528 // assumed that ownership has transferred to the AST. However if we have
22529 // scopes while we're freeing the parser, it's likely they came from
22530 // eval scopes and we need to free them explicitly here.
22531 pm_parser_scope_pop(parser);
22532 }
22533
22534 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22535 lex_mode_pop(parser);
22536 }
22537}
22538
22543pm_parse(pm_parser_t *parser) {
22544 return parse_program(parser);
22545}
22546
22552static bool
22553pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets) {
22554#define LINE_SIZE 4096
22555 char line[LINE_SIZE];
22556
22557 while (memset(line, '\n', LINE_SIZE), fgets(line, LINE_SIZE, stream) != NULL) {
22558 size_t length = LINE_SIZE;
22559 while (length > 0 && line[length - 1] == '\n') length--;
22560
22561 if (length == LINE_SIZE) {
22562 // If we read a line that is the maximum size and it doesn't end
22563 // with a newline, then we'll just append it to the buffer and
22564 // continue reading.
22565 length--;
22566 pm_buffer_append_string(buffer, line, length);
22567 continue;
22568 }
22569
22570 // Append the line to the buffer.
22571 length--;
22572 pm_buffer_append_string(buffer, line, length);
22573
22574 // Check if the line matches the __END__ marker. If it does, then stop
22575 // reading and return false. In most circumstances, this means we should
22576 // stop reading from the stream so that the DATA constant can pick it
22577 // up.
22578 switch (length) {
22579 case 7:
22580 if (strncmp(line, "__END__", 7) == 0) return false;
22581 break;
22582 case 8:
22583 if (strncmp(line, "__END__\n", 8) == 0) return false;
22584 break;
22585 case 9:
22586 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22587 break;
22588 }
22589 }
22590
22591 return true;
22592#undef LINE_SIZE
22593}
22594
22604static bool
22605pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22606 pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
22607
22608 for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
22609 if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
22610 return true;
22611 }
22612 }
22613
22614 return false;
22615}
22616
22624pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options) {
22625 pm_buffer_init(buffer);
22626
22627 bool eof = pm_parse_stream_read(buffer, stream, fgets);
22628 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22629 pm_node_t *node = pm_parse(parser);
22630
22631 while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22632 pm_node_destroy(parser, node);
22633 eof = pm_parse_stream_read(buffer, stream, fgets);
22634
22635 pm_parser_free(parser);
22636 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22637 node = pm_parse(parser);
22638 }
22639
22640 return node;
22641}
22642
22647pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22648 pm_options_t options = { 0 };
22649 pm_options_read(&options, data);
22650
22651 pm_parser_t parser;
22652 pm_parser_init(&parser, source, size, &options);
22653
22654 pm_node_t *node = pm_parse(&parser);
22655 pm_node_destroy(&parser, node);
22656
22657 bool result = parser.error_list.size == 0;
22658 pm_parser_free(&parser);
22659 pm_options_free(&options);
22660
22661 return result;
22662}
22663
22664#undef PM_CASE_KEYWORD
22665#undef PM_CASE_OPERATOR
22666#undef PM_CASE_WRITABLE
22667#undef PM_STRING_EMPTY
22668#undef PM_LOCATION_NODE_BASE_VALUE
22669#undef PM_LOCATION_NODE_VALUE
22670#undef PM_LOCATION_NULL_VALUE
22671#undef PM_LOCATION_TOKEN_VALUE
22672
22673// We optionally support serializing to a binary string. For systems that don't
22674// want or need this functionality, it can be turned off with the
22675// PRISM_EXCLUDE_SERIALIZATION define.
22676#ifndef PRISM_EXCLUDE_SERIALIZATION
22677
22678static inline void
22679pm_serialize_header(pm_buffer_t *buffer) {
22680 pm_buffer_append_string(buffer, "PRISM", 5);
22681 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22682 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22683 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22684 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22685}
22686
22691pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22692 pm_serialize_header(buffer);
22693 pm_serialize_content(parser, node, buffer);
22694 pm_buffer_append_byte(buffer, '\0');
22695}
22696
22702pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22703 pm_options_t options = { 0 };
22704 pm_options_read(&options, data);
22705
22706 pm_parser_t parser;
22707 pm_parser_init(&parser, source, size, &options);
22708
22709 pm_node_t *node = pm_parse(&parser);
22710
22711 pm_serialize_header(buffer);
22712 pm_serialize_content(&parser, node, buffer);
22713 pm_buffer_append_byte(buffer, '\0');
22714
22715 pm_node_destroy(&parser, node);
22716 pm_parser_free(&parser);
22717 pm_options_free(&options);
22718}
22719
22725pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const char *data) {
22726 pm_parser_t parser;
22727 pm_options_t options = { 0 };
22728 pm_options_read(&options, data);
22729
22730 pm_buffer_t parser_buffer;
22731 pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, fgets, &options);
22732 pm_serialize_header(buffer);
22733 pm_serialize_content(&parser, node, buffer);
22734 pm_buffer_append_byte(buffer, '\0');
22735
22736 pm_node_destroy(&parser, node);
22737 pm_buffer_free(&parser_buffer);
22738 pm_parser_free(&parser);
22739 pm_options_free(&options);
22740}
22741
22746pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22747 pm_options_t options = { 0 };
22748 pm_options_read(&options, data);
22749
22750 pm_parser_t parser;
22751 pm_parser_init(&parser, source, size, &options);
22752
22753 pm_node_t *node = pm_parse(&parser);
22754 pm_serialize_header(buffer);
22755 pm_serialize_encoding(parser.encoding, buffer);
22756 pm_buffer_append_varsint(buffer, parser.start_line);
22757 pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
22758
22759 pm_node_destroy(&parser, node);
22760 pm_parser_free(&parser);
22761 pm_options_free(&options);
22762}
22763
22764#endif
22765
22766/******************************************************************************/
22767/* Slice queries for the Ruby API */
22768/******************************************************************************/
22769
22771typedef enum {
22773 PM_SLICE_TYPE_ERROR = -1,
22774
22776 PM_SLICE_TYPE_NONE,
22777
22779 PM_SLICE_TYPE_LOCAL,
22780
22782 PM_SLICE_TYPE_CONSTANT,
22783
22785 PM_SLICE_TYPE_METHOD_NAME
22786} pm_slice_type_t;
22787
22791pm_slice_type_t
22792pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22793 // first, get the right encoding object
22794 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22795 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22796
22797 // check that there is at least one character
22798 if (length == 0) return PM_SLICE_TYPE_NONE;
22799
22800 size_t width;
22801 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22802 // valid because alphabetical
22803 } else if (*source == '_') {
22804 // valid because underscore
22805 width = 1;
22806 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22807 // valid because multibyte
22808 } else {
22809 // invalid because no match
22810 return PM_SLICE_TYPE_NONE;
22811 }
22812
22813 // determine the type of the slice based on the first character
22814 const uint8_t *end = source + length;
22815 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22816
22817 // next, iterate through all of the bytes of the string to ensure that they
22818 // are all valid identifier characters
22819 source += width;
22820
22821 while (source < end) {
22822 if ((width = encoding->alnum_char(source, end - source)) != 0) {
22823 // valid because alphanumeric
22824 source += width;
22825 } else if (*source == '_') {
22826 // valid because underscore
22827 source++;
22828 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22829 // valid because multibyte
22830 source += width;
22831 } else {
22832 // invalid because no match
22833 break;
22834 }
22835 }
22836
22837 // accept a ! or ? at the end of the slice as a method name
22838 if (*source == '!' || *source == '?' || *source == '=') {
22839 source++;
22840 result = PM_SLICE_TYPE_METHOD_NAME;
22841 }
22842
22843 // valid if we are at the end of the slice
22844 return source == end ? result : PM_SLICE_TYPE_NONE;
22845}
22846
22851pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22852 switch (pm_slice_type(source, length, encoding_name)) {
22853 case PM_SLICE_TYPE_ERROR:
22854 return PM_STRING_QUERY_ERROR;
22855 case PM_SLICE_TYPE_NONE:
22856 case PM_SLICE_TYPE_CONSTANT:
22857 case PM_SLICE_TYPE_METHOD_NAME:
22858 return PM_STRING_QUERY_FALSE;
22859 case PM_SLICE_TYPE_LOCAL:
22860 return PM_STRING_QUERY_TRUE;
22861 }
22862
22863 assert(false && "unreachable");
22864 return PM_STRING_QUERY_FALSE;
22865}
22866
22871pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22872 switch (pm_slice_type(source, length, encoding_name)) {
22873 case PM_SLICE_TYPE_ERROR:
22874 return PM_STRING_QUERY_ERROR;
22875 case PM_SLICE_TYPE_NONE:
22876 case PM_SLICE_TYPE_LOCAL:
22877 case PM_SLICE_TYPE_METHOD_NAME:
22878 return PM_STRING_QUERY_FALSE;
22879 case PM_SLICE_TYPE_CONSTANT:
22880 return PM_STRING_QUERY_TRUE;
22881 }
22882
22883 assert(false && "unreachable");
22884 return PM_STRING_QUERY_FALSE;
22885}
22886
22891pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22892#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22893#define C1(c) (*source == c)
22894#define C2(s) (memcmp(source, s, 2) == 0)
22895#define C3(s) (memcmp(source, s, 3) == 0)
22896
22897 switch (pm_slice_type(source, length, encoding_name)) {
22898 case PM_SLICE_TYPE_ERROR:
22899 return PM_STRING_QUERY_ERROR;
22900 case PM_SLICE_TYPE_NONE:
22901 break;
22902 case PM_SLICE_TYPE_LOCAL:
22903 // numbered parameters are not valid method names
22904 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22905 case PM_SLICE_TYPE_CONSTANT:
22906 // all constants are valid method names
22907 case PM_SLICE_TYPE_METHOD_NAME:
22908 // all method names are valid method names
22909 return PM_STRING_QUERY_TRUE;
22910 }
22911
22912 switch (length) {
22913 case 1:
22914 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22915 case 2:
22916 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22917 case 3:
22918 return B(C3("===") || C3("<=>") || C3("[]="));
22919 default:
22920 return PM_STRING_QUERY_FALSE;
22921 }
22922
22923#undef B
22924#undef C1
22925#undef C2
22926#undef C3
22927}
struct pm_block_parameter_node pm_block_parameter_node_t
BlockParameterNode.
struct pm_else_node pm_else_node_t
ElseNode.
struct pm_assoc_node pm_assoc_node_t
AssocNode.
struct pm_undef_node pm_undef_node_t
UndefNode.
struct pm_local_variable_target_node pm_local_variable_target_node_t
LocalVariableTargetNode.
struct pm_block_node pm_block_node_t
BlockNode.
struct pm_hash_pattern_node pm_hash_pattern_node_t
HashPatternNode.
struct pm_optional_parameter_node pm_optional_parameter_node_t
OptionalParameterNode.
struct pm_x_string_node pm_x_string_node_t
XStringNode.
struct pm_class_variable_write_node pm_class_variable_write_node_t
ClassVariableWriteNode.
struct pm_interpolated_string_node pm_interpolated_string_node_t
InterpolatedStringNode.
struct pm_call_node pm_call_node_t
CallNode.
struct pm_class_variable_read_node pm_class_variable_read_node_t
ClassVariableReadNode.
@ PM_RANGE_FLAGS_EXCLUDE_END
... operator
Definition ast.h:7854
struct pm_local_variable_read_node pm_local_variable_read_node_t
LocalVariableReadNode.
struct pm_arguments_node pm_arguments_node_t
ArgumentsNode.
@ PM_DEFINED_NODE
DefinedNode.
Definition ast.h:709
@ PM_PRE_EXECUTION_NODE
PreExecutionNode.
Definition ast.h:931
@ PM_RETRY_NODE
RetryNode.
Definition ast.h:964
@ PM_REDO_NODE
RedoNode.
Definition ast.h:943
@ PM_CONSTANT_PATH_WRITE_NODE
ConstantPathWriteNode.
Definition ast.h:694
@ PM_SOURCE_LINE_NODE
SourceLineNode.
Definition ast.h:985
@ PM_UNLESS_NODE
UnlessNode.
Definition ast.h:1009
@ PM_CALL_NODE
CallNode.
Definition ast.h:628
@ PM_NIL_NODE
NilNode.
Definition ast.h:895
@ PM_GLOBAL_VARIABLE_READ_NODE
GlobalVariableReadNode.
Definition ast.h:757
@ PM_RATIONAL_NODE
RationalNode.
Definition ast.h:940
@ PM_FIND_PATTERN_NODE
FindPatternNode.
Definition ast.h:727
@ PM_ARRAY_NODE
ArrayNode.
Definition ast.h:589
@ PM_CONSTANT_PATH_TARGET_NODE
ConstantPathTargetNode.
Definition ast.h:691
@ PM_OR_NODE
OrNode.
Definition ast.h:913
@ PM_MULTI_WRITE_NODE
MultiWriteNode.
Definition ast.h:889
@ PM_IF_NODE
IfNode.
Definition ast.h:772
@ PM_INTERPOLATED_STRING_NODE
InterpolatedStringNode.
Definition ast.h:826
@ PM_FALSE_NODE
FalseNode.
Definition ast.h:724
@ PM_HASH_NODE
HashNode.
Definition ast.h:766
@ PM_MATCH_PREDICATE_NODE
MatchPredicateNode.
Definition ast.h:871
@ PM_X_STRING_NODE
XStringNode.
Definition ast.h:1021
@ PM_GLOBAL_VARIABLE_TARGET_NODE
GlobalVariableTargetNode.
Definition ast.h:760
@ PM_AND_NODE
AndNode.
Definition ast.h:583
@ PM_CONSTANT_TARGET_NODE
ConstantTargetNode.
Definition ast.h:700
@ PM_IT_LOCAL_VARIABLE_READ_NODE
ItLocalVariableReadNode.
Definition ast.h:835
@ PM_SOURCE_FILE_NODE
SourceFileNode.
Definition ast.h:982
@ PM_NO_KEYWORDS_PARAMETER_NODE
NoKeywordsParameterNode.
Definition ast.h:898
@ PM_MULTI_TARGET_NODE
MultiTargetNode.
Definition ast.h:886
@ PM_SPLAT_NODE
SplatNode.
Definition ast.h:988
@ PM_CLASS_VARIABLE_READ_NODE
ClassVariableReadNode.
Definition ast.h:661
@ PM_ELSE_NODE
ElseNode.
Definition ast.h:712
@ PM_INTERPOLATED_MATCH_LAST_LINE_NODE
InterpolatedMatchLastLineNode.
Definition ast.h:820
@ PM_SYMBOL_NODE
SymbolNode.
Definition ast.h:1000
@ PM_RESCUE_MODIFIER_NODE
RescueModifierNode.
Definition ast.h:955
@ PM_ALIAS_METHOD_NODE
AliasMethodNode.
Definition ast.h:577
@ PM_MATCH_REQUIRED_NODE
MatchRequiredNode.
Definition ast.h:874
@ PM_BACK_REFERENCE_READ_NODE
BackReferenceReadNode.
Definition ast.h:601
@ PM_BLOCK_ARGUMENT_NODE
BlockArgumentNode.
Definition ast.h:607
@ PM_MISSING_NODE
MissingNode.
Definition ast.h:880
@ PM_SELF_NODE
SelfNode.
Definition ast.h:970
@ PM_TRUE_NODE
TrueNode.
Definition ast.h:1003
@ PM_ASSOC_SPLAT_NODE
AssocSplatNode.
Definition ast.h:598
@ PM_RANGE_NODE
RangeNode.
Definition ast.h:937
@ PM_LOCAL_VARIABLE_READ_NODE
LocalVariableReadNode.
Definition ast.h:859
@ PM_NEXT_NODE
NextNode.
Definition ast.h:892
@ PM_REGULAR_EXPRESSION_NODE
RegularExpressionNode.
Definition ast.h:946
@ PM_CONSTANT_WRITE_NODE
ConstantWriteNode.
Definition ast.h:703
@ PM_HASH_PATTERN_NODE
HashPatternNode.
Definition ast.h:769
@ PM_UNDEF_NODE
UndefNode.
Definition ast.h:1006
@ PM_ENSURE_NODE
EnsureNode.
Definition ast.h:721
@ PM_LOCAL_VARIABLE_WRITE_NODE
LocalVariableWriteNode.
Definition ast.h:865
@ PM_KEYWORD_HASH_NODE
KeywordHashNode.
Definition ast.h:841
@ PM_PARENTHESES_NODE
ParenthesesNode.
Definition ast.h:919
@ PM_CLASS_VARIABLE_WRITE_NODE
ClassVariableWriteNode.
Definition ast.h:667
@ PM_POST_EXECUTION_NODE
PostExecutionNode.
Definition ast.h:928
@ PM_RETURN_NODE
ReturnNode.
Definition ast.h:967
@ PM_ARRAY_PATTERN_NODE
ArrayPatternNode.
Definition ast.h:592
@ PM_MATCH_LAST_LINE_NODE
MatchLastLineNode.
Definition ast.h:868
@ PM_CONSTANT_PATH_NODE
ConstantPathNode.
Definition ast.h:682
@ PM_INTERPOLATED_SYMBOL_NODE
InterpolatedSymbolNode.
Definition ast.h:829
@ PM_CLASS_VARIABLE_TARGET_NODE
ClassVariableTargetNode.
Definition ast.h:664
@ PM_BREAK_NODE
BreakNode.
Definition ast.h:622
@ PM_IMAGINARY_NODE
ImaginaryNode.
Definition ast.h:775
@ PM_CONSTANT_READ_NODE
ConstantReadNode.
Definition ast.h:697
@ PM_GLOBAL_VARIABLE_WRITE_NODE
GlobalVariableWriteNode.
Definition ast.h:763
@ PM_SOURCE_ENCODING_NODE
SourceEncodingNode.
Definition ast.h:979
@ PM_BEGIN_NODE
BeginNode.
Definition ast.h:604
@ PM_INSTANCE_VARIABLE_READ_NODE
InstanceVariableReadNode.
Definition ast.h:808
@ PM_FLIP_FLOP_NODE
FlipFlopNode.
Definition ast.h:730
@ PM_INSTANCE_VARIABLE_WRITE_NODE
InstanceVariableWriteNode.
Definition ast.h:814
@ PM_INSTANCE_VARIABLE_TARGET_NODE
InstanceVariableTargetNode.
Definition ast.h:811
@ PM_CASE_NODE
CaseNode.
Definition ast.h:646
@ PM_FLOAT_NODE
FloatNode.
Definition ast.h:733
@ PM_ASSOC_NODE
AssocNode.
Definition ast.h:595
@ PM_INTEGER_NODE
IntegerNode.
Definition ast.h:817
@ PM_LOCAL_VARIABLE_TARGET_NODE
LocalVariableTargetNode.
Definition ast.h:862
@ PM_STRING_NODE
StringNode.
Definition ast.h:994
@ PM_ALIAS_GLOBAL_VARIABLE_NODE
AliasGlobalVariableNode.
Definition ast.h:574
@ PM_NUMBERED_REFERENCE_READ_NODE
NumberedReferenceReadNode.
Definition ast.h:904
@ PM_STATEMENTS_NODE
StatementsNode.
Definition ast.h:991
@ PM_BLOCK_NODE
BlockNode.
Definition ast.h:613
@ PM_INTERPOLATED_REGULAR_EXPRESSION_NODE
InterpolatedRegularExpressionNode.
Definition ast.h:823
struct pm_begin_node pm_begin_node_t
BeginNode.
struct pm_statements_node pm_statements_node_t
StatementsNode.
struct pm_instance_variable_write_node pm_instance_variable_write_node_t
InstanceVariableWriteNode.
struct pm_keyword_hash_node pm_keyword_hash_node_t
KeywordHashNode.
static const pm_node_flags_t PM_NODE_FLAG_NEWLINE
We store the flags enum in every node in the tree.
Definition ast.h:1046
@ PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING
internal bytes forced the encoding to US-ASCII
Definition ast.h:7937
struct pm_constant_path_node pm_constant_path_node_t
ConstantPathNode.
struct pm_local_variable_write_node pm_local_variable_write_node_t
LocalVariableWriteNode.
@ PM_STRING_FLAGS_FROZEN
frozen by virtue of a frozen_string_literal: true comment or --enable-frozen-string-literal
Definition ast.h:7920
@ PM_STRING_FLAGS_FORCED_BINARY_ENCODING
internal bytes forced the encoding to binary
Definition ast.h:7917
@ PM_STRING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition ast.h:7914
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING
if the arguments contain forwarding
Definition ast.h:7746
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS
if the arguments contain keywords
Definition ast.h:7749
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT
if the arguments contain a keyword splat
Definition ast.h:7752
struct pm_parameters_node pm_parameters_node_t
ParametersNode.
#define PM_NODE_FLAG_P(node, flag)
Return true if the given flag is set on the given node.
Definition ast.h:1063
struct pm_case_node pm_case_node_t
CaseNode.
struct pm_if_node pm_if_node_t
IfNode.
struct pm_rescue_modifier_node pm_rescue_modifier_node_t
RescueModifierNode.
struct pm_splat_node pm_splat_node_t
SplatNode.
struct pm_match_write_node pm_match_write_node_t
MatchWriteNode.
struct pm_multi_write_node pm_multi_write_node_t
MultiWriteNode.
struct pm_interpolated_x_string_node pm_interpolated_x_string_node_t
InterpolatedXStringNode.
struct pm_constant_write_node pm_constant_write_node_t
ConstantWriteNode.
struct pm_flip_flop_node pm_flip_flop_node_t
FlipFlopNode.
#define PM_NODE_TYPE_P(node, type)
Return true if the type of the given node matches the given type.
Definition ast.h:1058
#define PM_NODE_TYPE(node)
Cast the type to an enum to allow the compiler to provide exhaustiveness checking.
Definition ast.h:1053
struct pm_global_variable_read_node pm_global_variable_read_node_t
GlobalVariableReadNode.
struct pm_match_last_line_node pm_match_last_line_node_t
MatchLastLineNode.
struct pm_hash_node pm_hash_node_t
HashNode.
struct pm_block_local_variable_node pm_block_local_variable_node_t
BlockLocalVariableNode.
struct pm_multi_target_node pm_multi_target_node_t
MultiTargetNode.
@ PM_INTEGER_BASE_FLAGS_HEXADECIMAL
0x prefix
Definition ast.h:7811
@ PM_INTEGER_BASE_FLAGS_OCTAL
0o or 0 prefix
Definition ast.h:7808
@ PM_INTEGER_BASE_FLAGS_DECIMAL
0d or no prefix
Definition ast.h:7805
@ PM_INTEGER_BASE_FLAGS_BINARY
0b prefix
Definition ast.h:7802
struct pm_rational_node pm_rational_node_t
RationalNode.
struct pm_ensure_node pm_ensure_node_t
EnsureNode.
struct pm_forwarding_parameter_node pm_forwarding_parameter_node_t
ForwardingParameterNode.
struct pm_when_node pm_when_node_t
WhenNode.
enum pm_token_type pm_token_type_t
This enum represents every type of token in the Ruby source.
struct pm_range_node pm_range_node_t
RangeNode.
struct pm_and_node pm_and_node_t
AndNode.
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
When we're serializing to Java, we want to skip serializing the location fields as they won't be used...
Definition ast.h:7946
@ PM_CALL_NODE_FLAGS_SAFE_NAVIGATION
&.
Definition ast.h:7774
@ PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE
a call that is an attribute write, so the value being written should be returned
Definition ast.h:7780
@ PM_CALL_NODE_FLAGS_VARIABLE_CALL
a call that could have been a local variable
Definition ast.h:7777
struct pm_constant_read_node pm_constant_read_node_t
ConstantReadNode.
struct pm_or_node pm_or_node_t
OrNode.
struct pm_case_match_node pm_case_match_node_t
CaseMatchNode.
struct pm_imaginary_node pm_imaginary_node_t
ImaginaryNode.
struct pm_array_pattern_node pm_array_pattern_node_t
ArrayPatternNode.
struct pm_integer_node pm_integer_node_t
IntegerNode.
struct pm_constant_path_target_node pm_constant_path_target_node_t
ConstantPathTargetNode.
struct pm_global_variable_target_node pm_global_variable_target_node_t
GlobalVariableTargetNode.
struct pm_node_list pm_node_list_t
A list of nodes in the source, most often used for lists of children.
struct pm_required_parameter_node pm_required_parameter_node_t
RequiredParameterNode.
struct pm_symbol_node pm_symbol_node_t
SymbolNode.
struct pm_block_parameters_node pm_block_parameters_node_t
BlockParametersNode.
struct pm_parentheses_node pm_parentheses_node_t
ParenthesesNode.
@ PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING
internal bytes forced the encoding to US-ASCII
Definition ast.h:7892
@ PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
x - ignores whitespace and allows comments in regular expressions
Definition ast.h:7865
struct pm_instance_variable_read_node pm_instance_variable_read_node_t
InstanceVariableReadNode.
struct pm_constant_target_node pm_constant_target_node_t
ConstantTargetNode.
struct pm_node pm_node_t
This is the base structure that represents a node in the syntax tree.
struct pm_interpolated_symbol_node pm_interpolated_symbol_node_t
InterpolatedSymbolNode.
struct pm_class_variable_target_node pm_class_variable_target_node_t
ClassVariableTargetNode.
uint16_t pm_node_flags_t
These are the flags embedded in the node struct.
Definition ast.h:1040
struct pm_regular_expression_node pm_regular_expression_node_t
RegularExpressionNode.
@ PM_TOKEN_STAR_STAR
**
Definition ast.h:469
@ PM_TOKEN_DOT_DOT_DOT
the ... range operator or forwarding parameter
Definition ast.h:124
@ PM_TOKEN_MINUS_EQUAL
-=
Definition ast.h:385
@ PM_TOKEN_IGNORED_NEWLINE
an ignored newline
Definition ast.h:196
@ PM_TOKEN_BANG_EQUAL
!=
Definition ast.h:64
@ PM_TOKEN_KEYWORD___FILE__
FILE
Definition ast.h:349
@ PM_TOKEN_KEYWORD_WHEN
when
Definition ast.h:334
@ PM_TOKEN_FLOAT
a floating point number
Definition ast.h:160
@ PM_TOKEN_PLUS_EQUAL
+=
Definition ast.h:442
@ PM_TOKEN_DOT_DOT
the .
Definition ast.h:121
@ PM_TOKEN_UDOT_DOT
unary .
Definition ast.h:496
@ PM_TOKEN_AMPERSAND_DOT
&.
Definition ast.h:49
@ PM_TOKEN_NEWLINE
a newline character outside of other tokens
Definition ast.h:391
@ PM_TOKEN_NUMBERED_REFERENCE
a numbered reference to a capture group in the previous regular expression match
Definition ast.h:394
@ PM_TOKEN_AMPERSAND
&
Definition ast.h:40
@ PM_TOKEN_KEYWORD_YIELD
yield
Definition ast.h:343
@ PM_TOKEN_KEYWORD_END
end
Definition ast.h:253
@ PM_TOKEN_LAMBDA_BEGIN
{
Definition ast.h:361
@ PM_TOKEN_KEYWORD_UNTIL_MODIFIER
until in the modifier form
Definition ast.h:331
@ PM_TOKEN_EQUAL_EQUAL_EQUAL
===
Definition ast.h:151
@ PM_TOKEN_INTEGER_RATIONAL
an integer with a rational suffix
Definition ast.h:208
@ PM_TOKEN_USTAR
unary *
Definition ast.h:511
@ PM_TOKEN_TILDE
~ or ~@
Definition ast.h:487
@ PM_TOKEN_KEYWORD___ENCODING__
ENCODING
Definition ast.h:346
@ PM_TOKEN_REGEXP_END
the end of a regular expression
Definition ast.h:451
@ PM_TOKEN_KEYWORD_UNTIL
until
Definition ast.h:328
@ PM_TOKEN_COMMA
,
Definition ast.h:109
@ PM_TOKEN_MAXIMUM
The maximum token value.
Definition ast.h:523
@ PM_TOKEN_GREATER
Definition ast.h:175
@ PM_TOKEN_INTEGER
an integer (any base)
Definition ast.h:202
@ PM_TOKEN_SLASH_EQUAL
/=
Definition ast.h:460
@ PM_TOKEN_UMINUS_NUM
-@ for a number
Definition ast.h:505
@ PM_TOKEN_EMBVAR
Definition ast.h:142
@ PM_TOKEN_KEYWORD_UNLESS_MODIFIER
unless in the modifier form
Definition ast.h:325
@ PM_TOKEN_INTEGER_RATIONAL_IMAGINARY
an integer with a rational and imaginary suffix
Definition ast.h:211
@ PM_TOKEN_FLOAT_RATIONAL_IMAGINARY
a floating pointer number with a rational and imaginary suffix
Definition ast.h:169
@ PM_TOKEN_BRACKET_LEFT_RIGHT
[]
Definition ast.h:82
@ PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL
&&=
Definition ast.h:46
@ PM_TOKEN_KEYWORD_CLASS
class
Definition ast.h:232
@ PM_TOKEN_KEYWORD_BEGIN
begin
Definition ast.h:220
@ PM_TOKEN_NOT_PROVIDED
a token that was not present but it is okay
Definition ast.h:37
@ PM_TOKEN_USTAR_STAR
unary **
Definition ast.h:514
@ PM_TOKEN_GREATER_GREATER_EQUAL
Definition ast.h:184
@ PM_TOKEN_PERCENT_EQUAL
%=
Definition ast.h:409
@ PM_TOKEN_PERCENT
%
Definition ast.h:406
@ PM_TOKEN_KEYWORD_IN
in
Definition ast.h:274
@ PM_TOKEN_BANG
!
Definition ast.h:61
@ PM_TOKEN_KEYWORD_NOT
not
Definition ast.h:286
@ PM_TOKEN_BRACKET_LEFT_ARRAY
[ for the beginning of an array
Definition ast.h:79
@ PM_TOKEN_HEREDOC_END
the end of a heredoc
Definition ast.h:187
@ PM_TOKEN_HEREDOC_START
the start of a heredoc
Definition ast.h:190
@ PM_TOKEN_KEYWORD_DEFINED
defined?
Definition ast.h:238
@ PM_TOKEN_UCOLON_COLON
unary ::
Definition ast.h:493
@ PM_TOKEN_LABEL_END
the end of a label
Definition ast.h:358
@ PM_TOKEN_EQUAL_GREATER
=>
Definition ast.h:154
@ PM_TOKEN_KEYWORD_UNLESS
unless
Definition ast.h:322
@ PM_TOKEN_KEYWORD_ENSURE
ensure
Definition ast.h:259
@ PM_TOKEN_AMPERSAND_EQUAL
&=
Definition ast.h:52
@ PM_TOKEN_EQUAL_EQUAL
==
Definition ast.h:148
@ PM_TOKEN_UPLUS
+@
Definition ast.h:508
@ PM_TOKEN_FLOAT_IMAGINARY
a floating pointer number with an imaginary suffix
Definition ast.h:163
@ PM_TOKEN_KEYWORD_BEGIN_UPCASE
BEGIN.
Definition ast.h:223
@ PM_TOKEN_LESS_EQUAL_GREATER
<=>
Definition ast.h:370
@ PM_TOKEN_KEYWORD_RESCUE_MODIFIER
rescue in the modifier form
Definition ast.h:298
@ PM_TOKEN_MISSING
a token that was expected but not found
Definition ast.h:34
@ PM_TOKEN_MINUS_GREATER
->
Definition ast.h:388
@ PM_TOKEN_KEYWORD_FALSE
false
Definition ast.h:262
@ PM_TOKEN_PIPE_PIPE_EQUAL
||=
Definition ast.h:436
@ PM_TOKEN_KEYWORD_IF
if
Definition ast.h:268
@ PM_TOKEN_EMBEXPR_BEGIN
#{
Definition ast.h:136
@ PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES
( for a parentheses node
Definition ast.h:400
@ PM_TOKEN_EMBDOC_END
=end
Definition ast.h:130
@ PM_TOKEN_KEYWORD_ELSE
else
Definition ast.h:247
@ PM_TOKEN_BACK_REFERENCE
a back reference
Definition ast.h:58
@ PM_TOKEN_BRACKET_LEFT
[
Definition ast.h:76
@ PM_TOKEN_EOF
final token in the file
Definition ast.h:31
@ PM_TOKEN_PIPE_PIPE
||
Definition ast.h:433
@ PM_TOKEN_KEYWORD_NIL
nil
Definition ast.h:283
@ PM_TOKEN_PERCENT_UPPER_W
W
Definition ast.h:424
@ PM_TOKEN_KEYWORD_RETURN
return
Definition ast.h:304
@ PM_TOKEN_CLASS_VARIABLE
a class variable
Definition ast.h:100
@ PM_TOKEN_PIPE
|
Definition ast.h:427
@ PM_TOKEN_PARENTHESIS_LEFT
(
Definition ast.h:397
@ PM_TOKEN_BANG_TILDE
!
Definition ast.h:67
@ PM_TOKEN_DOT
the .
Definition ast.h:118
@ PM_TOKEN_PARENTHESIS_RIGHT
)
Definition ast.h:403
@ PM_TOKEN_KEYWORD_RESCUE
rescue
Definition ast.h:295
@ PM_TOKEN_INSTANCE_VARIABLE
an instance variable
Definition ast.h:199
@ PM_TOKEN_PIPE_EQUAL
|=
Definition ast.h:430
@ PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL
[]=
Definition ast.h:85
@ PM_TOKEN_UAMPERSAND
unary &
Definition ast.h:490
@ PM_TOKEN_MINUS
Definition ast.h:382
@ PM_TOKEN_CONSTANT
a constant
Definition ast.h:115
@ PM_TOKEN_IDENTIFIER
an identifier
Definition ast.h:193
@ PM_TOKEN_EMBDOC_BEGIN
=begin
Definition ast.h:127
@ PM_TOKEN_STAR_EQUAL
*=
Definition ast.h:466
@ PM_TOKEN_KEYWORD_OR
or
Definition ast.h:289
@ PM_TOKEN_KEYWORD_AND
and
Definition ast.h:217
@ PM_TOKEN_LESS
<
Definition ast.h:364
@ PM_TOKEN_KEYWORD_BREAK
break
Definition ast.h:226
@ PM_TOKEN_PERCENT_LOWER_W
w
Definition ast.h:415
@ PM_TOKEN_SYMBOL_BEGIN
the beginning of a symbol
Definition ast.h:484
@ PM_TOKEN_METHOD_NAME
a method name
Definition ast.h:379
@ PM_TOKEN_KEYWORD_CASE
case
Definition ast.h:229
@ PM_TOKEN_WORDS_SEP
a separator between words in a list
Definition ast.h:517
@ PM_TOKEN_FLOAT_RATIONAL
a floating pointer number with a rational suffix
Definition ast.h:166
@ PM_TOKEN_LESS_LESS_EQUAL
<<=
Definition ast.h:376
@ PM_TOKEN_EMBDOC_LINE
a line inside of embedded documentation
Definition ast.h:133
@ PM_TOKEN_KEYWORD_SUPER
super
Definition ast.h:310
@ PM_TOKEN_KEYWORD_DO
do
Definition ast.h:241
@ PM_TOKEN_KEYWORD_REDO
redo
Definition ast.h:292
@ PM_TOKEN_EQUAL_TILDE
=~
Definition ast.h:157
@ PM_TOKEN_EMBEXPR_END
}
Definition ast.h:139
@ PM_TOKEN_KEYWORD_END_UPCASE
END.
Definition ast.h:256
@ PM_TOKEN_KEYWORD___LINE__
LINE
Definition ast.h:352
@ PM_TOKEN_STRING_END
the end of a string
Definition ast.h:481
@ PM_TOKEN_STRING_CONTENT
the contents of a string
Definition ast.h:478
@ PM_TOKEN_BRACE_LEFT
{
Definition ast.h:70
@ PM_TOKEN_COLON_COLON
::
Definition ast.h:106
@ PM_TOKEN_GREATER_GREATER
Definition ast.h:181
@ PM_TOKEN_PERCENT_LOWER_X
x
Definition ast.h:418
@ PM_TOKEN_KEYWORD_SELF
self
Definition ast.h:307
@ PM_TOKEN_PERCENT_LOWER_I
i
Definition ast.h:412
@ PM_TOKEN_KEYWORD_ALIAS
alias
Definition ast.h:214
@ PM_TOKEN_GLOBAL_VARIABLE
a global variable
Definition ast.h:172
@ PM_TOKEN_KEYWORD_IF_MODIFIER
if in the modifier form
Definition ast.h:271
@ PM_TOKEN_SLASH
/
Definition ast.h:457
@ PM_TOKEN_KEYWORD_RETRY
retry
Definition ast.h:301
@ PM_TOKEN_COLON
:
Definition ast.h:103
@ PM_TOKEN_KEYWORD_UNDEF
undef
Definition ast.h:319
@ PM_TOKEN_BRACKET_RIGHT
]
Definition ast.h:88
@ PM_TOKEN_KEYWORD_FOR
for
Definition ast.h:265
@ PM_TOKEN_KEYWORD_THEN
then
Definition ast.h:313
@ PM_TOKEN_QUESTION_MARK
?
Definition ast.h:445
@ PM_TOKEN___END__
marker for the point in the file at which the parser should stop
Definition ast.h:520
@ PM_TOKEN_KEYWORD_WHILE
while
Definition ast.h:337
@ PM_TOKEN_EQUAL
=
Definition ast.h:145
@ PM_TOKEN_KEYWORD_DEF
def
Definition ast.h:235
@ PM_TOKEN_UDOT_DOT_DOT
unary ... operator
Definition ast.h:499
@ PM_TOKEN_STAR
Definition ast.h:463
@ PM_TOKEN_KEYWORD_WHILE_MODIFIER
while in the modifier form
Definition ast.h:340
@ PM_TOKEN_KEYWORD_TRUE
true
Definition ast.h:316
@ PM_TOKEN_BRACE_RIGHT
}
Definition ast.h:73
@ PM_TOKEN_SEMICOLON
;
Definition ast.h:454
@ PM_TOKEN_REGEXP_BEGIN
the beginning of a regular expression
Definition ast.h:448
@ PM_TOKEN_CARET
^
Definition ast.h:91
@ PM_TOKEN_PERCENT_UPPER_I
I
Definition ast.h:421
@ PM_TOKEN_KEYWORD_DO_LOOP
do keyword for a predicate in a while, until, or for loop
Definition ast.h:244
@ PM_TOKEN_KEYWORD_MODULE
module
Definition ast.h:277
@ PM_TOKEN_PLUS
Definition ast.h:439
@ PM_TOKEN_KEYWORD_NEXT
next
Definition ast.h:280
@ PM_TOKEN_BACKTICK
`
Definition ast.h:55
@ PM_TOKEN_INTEGER_IMAGINARY
an integer with an imaginary suffix
Definition ast.h:205
@ PM_TOKEN_LABEL
a label
Definition ast.h:355
@ PM_TOKEN_STAR_STAR_EQUAL
**=
Definition ast.h:472
@ PM_TOKEN_CHARACTER_LITERAL
a character literal
Definition ast.h:97
@ PM_TOKEN_AMPERSAND_AMPERSAND
&&
Definition ast.h:43
@ PM_TOKEN_UMINUS
-@
Definition ast.h:502
@ PM_TOKEN_LESS_LESS
<<
Definition ast.h:373
@ PM_TOKEN_GREATER_EQUAL
>=
Definition ast.h:178
@ PM_TOKEN_COMMENT
a comment
Definition ast.h:112
@ PM_TOKEN_CARET_EQUAL
^=
Definition ast.h:94
@ PM_TOKEN_KEYWORD_ELSIF
elsif
Definition ast.h:250
@ PM_TOKEN_STRING_BEGIN
the beginning of a string
Definition ast.h:475
@ PM_TOKEN_LESS_EQUAL
<=
Definition ast.h:367
struct pm_rescue_node pm_rescue_node_t
RescueNode.
struct pm_array_node pm_array_node_t
ArrayNode.
struct pm_global_variable_write_node pm_global_variable_write_node_t
GlobalVariableWriteNode.
@ PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition ast.h:7791
struct pm_interpolated_match_last_line_node pm_interpolated_match_last_line_node_t
InterpolatedMatchLastLineNode.
struct pm_unless_node pm_unless_node_t
UnlessNode.
struct pm_interpolated_regular_expression_node pm_interpolated_regular_expression_node_t
InterpolatedRegularExpressionNode.
struct pm_instance_variable_target_node pm_instance_variable_target_node_t
InstanceVariableTargetNode.
struct pm_string_node pm_string_node_t
StringNode.
struct pm_float_node pm_float_node_t
FloatNode.
@ PM_LOOP_FLAGS_BEGIN_MODIFIER
a loop after a begin statement, so the body is executed first before the condition
Definition ast.h:7838
struct pm_find_pattern_node pm_find_pattern_node_t
FindPatternNode.
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:29
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
struct pm_options_scope pm_options_scope_t
A scope of locals surrounding the code that is being parsed.
struct pm_options pm_options_t
The options that can be passed to the parser.
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:185
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:191
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:71
struct pm_locals pm_locals_t
This is a set of local variables in a certain lexical context (method, class, module,...
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:267
struct pm_lex_mode pm_lex_mode_t
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:496
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:321
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:348
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:333
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:345
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:357
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:306
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:303
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:384
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:336
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:309
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:378
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:414
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:429
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:402
@ PM_CONTEXT_IF
an if statement
Definition parser.h:360
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:396
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:375
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:318
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:369
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:315
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:363
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:390
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:399
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:291
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:327
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:423
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:408
@ PM_CONTEXT_DEFINED
a defined?
Definition parser.h:339
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:387
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:432
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:330
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:354
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:405
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:417
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:342
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:372
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:312
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:381
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:366
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:411
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:351
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:426
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:324
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:420
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:393
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:435
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:522
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:448
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
struct pm_list_node pm_list_node_t
This struct represents an abstract linked list that provides common functionality.
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:234
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:78
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:34
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:113
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:50
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:240
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:248
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:242
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:245
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2133
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2110
char * pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream to retrieve a line of input from a stream.
Definition prism.h:88
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2040
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:362
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:17973
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:17975
const uint8_t * start
The start of the regular expression.
Definition prism.c:17978
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:17989
const uint8_t * end
The end of the regular expression.
Definition prism.c:17981
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:20831
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:20842
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:20833
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:20839
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:20836
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:20849
struct pm_node * left
AndNode#left.
Definition ast.h:1279
struct pm_node * right
AndNode#right.
Definition ast.h:1292
pm_node_t base
The embedded base node.
Definition ast.h:1326
struct pm_node_list arguments
ArgumentsNode#arguments.
Definition ast.h:1337
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1575
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1586
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1589
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1577
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1580
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1583
struct pm_node_list elements
ArrayNode#elements.
Definition ast.h:1365
struct pm_node * constant
ArrayPatternNode#constant.
Definition ast.h:1424
pm_location_t opening_loc
ArrayPatternNode#opening_loc.
Definition ast.h:1464
pm_node_t base
The embedded base node.
Definition ast.h:1418
pm_location_t closing_loc
ArrayPatternNode#closing_loc.
Definition ast.h:1474
struct pm_node * value
AssocNode#value.
Definition ast.h:1521
struct pm_node * key
AssocNode#key.
Definition ast.h:1508
struct pm_ensure_node * ensure_clause
BeginNode#ensure_clause.
Definition ast.h:1668
struct pm_rescue_node * rescue_clause
BeginNode#rescue_clause.
Definition ast.h:1648
struct pm_statements_node * statements
BeginNode#statements.
Definition ast.h:1638
pm_node_t base
The embedded base node.
Definition ast.h:1617
struct pm_else_node * else_clause
BeginNode#else_clause.
Definition ast.h:1658
This struct represents a set of binding powers used for a given token.
Definition prism.c:12943
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12951
pm_binding_power_t left
The left binding power.
Definition prism.c:12945
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12957
pm_binding_power_t right
The right binding power.
Definition prism.c:12948
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
pm_location_t opening_loc
CallNode#opening_loc.
Definition ast.h:2180
pm_location_t closing_loc
CallNode#closing_loc.
Definition ast.h:2200
struct pm_node * receiver
CallNode#receiver.
Definition ast.h:2138
pm_constant_id_t name
CallNode::name.
Definition ast.h:2161
pm_node_t base
The embedded base node.
Definition ast.h:2121
pm_location_t call_operator_loc
CallNode#call_operator_loc.
Definition ast.h:2151
pm_location_t message_loc
CallNode#message_loc.
Definition ast.h:2171
struct pm_arguments_node * arguments
CallNode#arguments.
Definition ast.h:2190
struct pm_node * block
CallNode#block.
Definition ast.h:2210
struct pm_node_list conditions
CaseMatchNode#conditions.
Definition ast.h:2568
struct pm_node_list conditions
CaseNode#conditions.
Definition ast.h:2638
pm_location_t location
The location of the comment in the source.
Definition parser.h:463
A list of constant IDs.
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
pm_context_t context
The context that this node represents.
Definition parser.h:441
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:444
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:359
struct pm_statements_node * statements
ElseNode#statements.
Definition ast.h:3623
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
struct pm_statements_node * statements
EnsureNode#statements.
Definition ast.h:3721
struct pm_node * constant
FindPatternNode#constant.
Definition ast.h:3773
pm_location_t opening_loc
FindPatternNode#opening_loc.
Definition ast.h:3793
pm_node_t base
The embedded base node.
Definition ast.h:3767
pm_location_t closing_loc
FindPatternNode#closing_loc.
Definition ast.h:3798
double value
FloatNode#value.
Definition ast.h:3859
pm_node_t base
The embedded base node.
Definition ast.h:3851
struct pm_node_list elements
HashNode#elements.
Definition ast.h:4285
pm_location_t opening_loc
HashPatternNode#opening_loc.
Definition ast.h:4336
pm_node_t base
The embedded base node.
Definition ast.h:4315
pm_location_t closing_loc
HashPatternNode#closing_loc.
Definition ast.h:4341
struct pm_node * constant
HashPatternNode#constant.
Definition ast.h:4321
All of the information necessary to store to lexing a heredoc.
Definition parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:90
struct pm_statements_node * statements
IfNode#statements.
Definition ast.h:4422
struct pm_node * subsequent
IfNode#subsequent.
Definition ast.h:4441
pm_integer_t value
IntegerNode#value.
Definition ast.h:5088
pm_node_t base
The embedded base node.
Definition ast.h:5080
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
pm_node_t base
The embedded base node.
Definition ast.h:5201
pm_location_t opening_loc
InterpolatedStringNode#opening_loc.
Definition ast.h:5207
pm_node_t base
The embedded base node.
Definition ast.h:5234
pm_location_t opening_loc
InterpolatedXStringNode#opening_loc.
Definition ast.h:5273
pm_node_t base
The embedded base node.
Definition ast.h:5267
struct pm_node_list parts
InterpolatedXStringNode#parts.
Definition ast.h:5278
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:518
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:512
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:165
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:153
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:156
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:162
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:171
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:233
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:249
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:254
union pm_lex_mode::@303336126360075302344075121136356113360170030306 as
The data associated with this type of lex mode.
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:208
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:239
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:246
enum pm_lex_mode::@204051102252353332352362146052355003264223055126 mode
The type of this lex mode.
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:532
pm_constant_id_t name
The name of the local variable.
Definition parser.h:534
pm_location_t location
The location of the local variable in the source.
Definition parser.h:537
uint32_t hash
The hash of the local variable.
Definition parser.h:546
uint32_t index
The index of the local variable in the local table.
Definition parser.h:540
uint32_t reads
The number of times the local variable is read.
Definition parser.h:543
uint32_t depth
LocalVariableReadNode#depth.
Definition ast.h:5610
pm_constant_id_t name
LocalVariableReadNode#name.
Definition ast.h:5597
uint32_t depth
LocalVariableWriteNode#depth.
Definition ast.h:5680
pm_constant_id_t name
LocalVariableWriteNode#name.
Definition ast.h:5667
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:562
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:559
uint32_t size
The number of local variables in the set.
Definition parser.h:556
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:545
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:547
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:550
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:475
struct pm_node_list targets
MatchWriteNode#targets.
Definition ast.h:5862
pm_node_t base
The embedded base node.
Definition ast.h:5947
pm_location_t lparen_loc
MultiTargetNode#lparen_loc.
Definition ast.h:6003
struct pm_node_list lefts
MultiTargetNode#lefts.
Definition ast.h:5963
pm_location_t rparen_loc
MultiTargetNode#rparen_loc.
Definition ast.h:6013
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
size_t size
The number of nodes in the list.
Definition ast.h:560
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:566
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1074
pm_node_flags_t flags
This represents any flags on the node.
Definition ast.h:1080
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1092
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:126
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:88
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:142
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:149
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:103
int32_t line
The line within the file that the parse starts on.
Definition options.h:97
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:82
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:135
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:159
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:108
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:91
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:123
struct pm_node * left
OrNode#left.
Definition ast.h:6355
struct pm_node * right
OrNode#right.
Definition ast.h:6368
struct pm_node * rest
ParametersNode#rest.
Definition ast.h:6412
struct pm_block_parameter_node * block
ParametersNode#block.
Definition ast.h:6432
pm_node_t base
The embedded base node.
Definition ast.h:6396
struct pm_node * keyword_rest
ParametersNode#keyword_rest.
Definition ast.h:6427
struct pm_node * body
ParenthesesNode#body.
Definition ast.h:6455
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:840
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:649
uint8_t command_line
The command line flags given from the options.
Definition parser.h:859
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:755
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:882
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:909
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:694
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:888
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:797
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:930
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:786
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:912
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:707
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:749
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:721
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:658
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:774
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:856
pm_token_t previous
The previous token we were considering.
Definition parser.h:697
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:803
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:875
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:924
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:903
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:728
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:740
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:691
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:652
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:734
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:869
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:853
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:768
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:684
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:731
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:715
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:664
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:762
int32_t start_line
The line number at the start of the parse.
Definition parser.h:809
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:896
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:681
struct pm_parser::@236040131255244317313236162207277265316171136011 lex_modes
A stack of lex modes.
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:718
size_t index
The current index into the lexer mode stack.
Definition parser.h:687
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:780
pm_scope_t * current_scope
The current local scope.
Definition parser.h:737
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:885
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:789
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:918
struct pm_node * right
RangeNode#right.
Definition ast.h:6683
struct pm_node * left
RangeNode#left.
Definition ast.h:6669
pm_node_t base
The embedded base node.
Definition ast.h:6713
pm_integer_t numerator
RationalNode#numerator.
Definition ast.h:6723
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:10369
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:10374
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:10371
pm_node_t base
The embedded base node.
Definition ast.h:6780
pm_string_t unescaped
RegularExpressionNode#unescaped.
Definition ast.h:6801
struct pm_node * rescue_expression
RescueModifierNode#rescue_expression.
Definition ast.h:6893
struct pm_rescue_node * subsequent
RescueNode#subsequent.
Definition ast.h:6946
pm_node_t base
The embedded base node.
Definition ast.h:6915
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:582
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:593
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:620
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:585
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:614
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:626
struct pm_node * expression
SplatNode#expression.
Definition ast.h:7221
struct pm_node_list body
StatementsNode#body.
Definition ast.h:7244
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
pm_node_t base
The embedded base node.
Definition ast.h:7273
pm_string_t unescaped
StringNode#unescaped.
Definition ast.h:7294
pm_location_t closing_loc
StringNode#closing_loc.
Definition ast.h:7289
pm_location_t opening_loc
StringNode#opening_loc.
Definition ast.h:7279
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@346265266332060241255337121126133217326336224105 type
The type of the string.
pm_location_t value_loc
SymbolNode#value_loc.
Definition ast.h:7376
pm_string_t unescaped
SymbolNode#unescaped.
Definition ast.h:7386
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:10343
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:10348
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:10354
This struct represents a token in the Ruby source.
Definition ast.h:530
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:538
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:535
pm_token_type_t type
The type of the token.
Definition ast.h:532
struct pm_statements_node * statements
UnlessNode#statements.
Definition ast.h:7500
struct pm_else_node * else_clause
UnlessNode#else_clause.
Definition ast.h:7510