33OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
35extern OnigCaseFoldType
36onig_get_default_case_fold_flag(
void)
38 return OnigDefaultCaseFoldFlag;
42onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
44 OnigDefaultCaseFoldFlag = case_fold_flag;
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
55str_dup(UChar* s, UChar* end)
57 ptrdiff_t
len = end - s;
74 c = *a; *a = *b; *b = c;
76 if (NTYPE(a) == NT_STR) {
79 size_t len = sn->end - sn->s;
81 sn->end = sn->s +
len;
85 if (NTYPE(b) == NT_STR) {
88 size_t len = sn->end - sn->s;
90 sn->end = sn->s +
len;
96distance_add(OnigDistance d1, OnigDistance d2)
98 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99 return ONIG_INFINITE_DISTANCE;
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2)
return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
107distance_multiply(OnigDistance d,
int m)
109 if (m == 0)
return 0;
111 if (d < ONIG_INFINITE_DISTANCE / m)
114 return ONIG_INFINITE_DISTANCE;
118bitset_is_empty(BitSetRef bs)
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0)
return 0;
129bitset_on_num(BitSetRef bs)
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
151 else if (reg->alloc > reg->used) {
152 unsigned char *new_ptr =
xrealloc(reg->p, reg->used);
155 reg->alloc = reg->used;
159 }
while ((reg = reg->chain) != 0);
163onig_bbuf_init(
BBuf* buf, OnigDistance size)
170 buf->p = (UChar* )
xmalloc(size);
171 if (IS_NULL(buf->p))
return(ONIGERR_MEMORY);
174 buf->alloc = (
unsigned int )size;
180#ifdef USE_SUBEXP_CALL
188 CHECK_NULL_RETURN_MEMERR(p);
190 uslist->alloc = size;
207 if (uslist->num >= uslist->alloc) {
208 size = uslist->alloc * 2;
210 CHECK_NULL_RETURN_MEMERR(p);
211 uslist->alloc = size;
215 uslist->us[uslist->num].offset = offset;
216 uslist->us[uslist->num].target = node;
224add_opcode(
regex_t* reg,
int opcode)
226 BBUF_ADD1(reg, opcode);
230#ifdef USE_COMBINATION_EXPLOSION_CHECK
232add_state_check_num(
regex_t* reg,
int num)
234 StateCheckNumType n = (StateCheckNumType )num;
236 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
242add_rel_addr(
regex_t* reg,
int addr)
244 RelAddrType ra = (RelAddrType )addr;
246 BBUF_ADD(reg, &ra, SIZE_RELADDR);
251add_abs_addr(
regex_t* reg,
int addr)
253 AbsAddrType ra = (AbsAddrType )addr;
255 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
262 LengthType l = (LengthType )
len;
264 BBUF_ADD(reg, &l, SIZE_LENGTH);
269add_mem_num(
regex_t* reg,
int num)
271 MemNumType n = (MemNumType )num;
273 BBUF_ADD(reg, &n, SIZE_MEMNUM);
279add_pointer(
regex_t* reg,
void* addr)
281 PointerType ptr = (PointerType )addr;
283 BBUF_ADD(reg, &ptr, SIZE_POINTER);
289add_option(
regex_t* reg, OnigOptionType option)
291 BBUF_ADD(reg, &option, SIZE_OPTION);
296add_opcode_rel_addr(
regex_t* reg,
int opcode,
int addr)
300 r = add_opcode(reg, opcode);
302 r = add_rel_addr(reg, addr);
307add_bytes(
regex_t* reg, UChar* bytes, OnigDistance
len)
309 BBUF_ADD(reg, bytes,
len);
314add_bitset(
regex_t* reg, BitSetRef bs)
316 BBUF_ADD(reg, bs, SIZE_BITSET);
321add_opcode_option(
regex_t* reg,
int opcode, OnigOptionType option)
325 r = add_opcode(reg, opcode);
327 r = add_option(reg, option);
331static int compile_length_tree(
Node* node,
regex_t* reg);
335#define IS_NEED_STR_LEN_OP_EXACT(op) \
336 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
337 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
340select_str_opcode(
int mb_len, OnigDistance byte_len,
int ignore_case)
343 OnigDistance str_len = roomof(byte_len, mb_len);
347 case 1: op = OP_EXACT1_IC;
break;
348 default: op = OP_EXACTN_IC;
break;
355 case 1: op = OP_EXACT1;
break;
356 case 2: op = OP_EXACT2;
break;
357 case 3: op = OP_EXACT3;
break;
358 case 4: op = OP_EXACT4;
break;
359 case 5: op = OP_EXACT5;
break;
360 default: op = OP_EXACTN;
break;
366 case 1: op = OP_EXACTMB2N1;
break;
367 case 2: op = OP_EXACTMB2N2;
break;
368 case 3: op = OP_EXACTMB2N3;
break;
369 default: op = OP_EXACTMB2N;
break;
386compile_tree_empty_check(
Node* node,
regex_t* reg,
int empty_info)
389 int saved_num_null_check = reg->num_null_check;
391 if (empty_info != 0) {
392 r = add_opcode(reg, OP_NULL_CHECK_START);
394 r = add_mem_num(reg, reg->num_null_check);
396 reg->num_null_check++;
397 if ((MemNumType)reg->num_null_check <= 0)
return ONIGERR_TOO_MANY_NULL_CHECK;
400 r = compile_tree(node, reg);
403 if (empty_info != 0) {
404 if (empty_info == NQ_TARGET_IS_EMPTY)
405 r = add_opcode(reg, OP_NULL_CHECK_END);
406 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
407 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
408 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
409 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
412 r = add_mem_num(reg, saved_num_null_check);
417#ifdef USE_SUBEXP_CALL
423 r = add_opcode(reg, OP_CALL);
425 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
428 r = add_abs_addr(reg, 0 );
434compile_tree_n_times(
Node* node,
int n,
regex_t* reg)
438 for (i = 0; i < n; i++) {
439 r = compile_tree(node, reg);
446add_compile_string_length(UChar* s ARG_UNUSED,
int mb_len, OnigDistance byte_len,
447 regex_t* reg ARG_UNUSED,
int ignore_case)
450 int op = select_str_opcode(mb_len, byte_len, ignore_case);
454 if (op == OP_EXACTMBN)
len += SIZE_LENGTH;
455 if (IS_NEED_STR_LEN_OP_EXACT(op))
458 len += (int )byte_len;
463add_compile_string(UChar* s,
int mb_len, OnigDistance byte_len,
466 int op = select_str_opcode(mb_len, byte_len, ignore_case);
469 if (op == OP_EXACTMBN)
470 add_length(reg, mb_len);
472 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
473 if (op == OP_EXACTN_IC)
474 add_length(reg, byte_len);
476 add_length(reg, byte_len / mb_len);
479 add_bytes(reg, s, byte_len);
485compile_length_string_node(
Node* node,
regex_t* reg)
487 int rlen, r,
len, prev_len, blen, ambig;
493 if (sn->end <= sn->s)
496 ambig = NSTRING_IS_AMBIG(node);
499 prev_len = enclen(enc, p, sn->end);
504 for (; p < sn->end; ) {
505 len = enclen(enc, p, sn->end);
506 if (
len == prev_len || ambig) {
510 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
518 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
526 if (sn->end <= sn->s)
529 return add_compile_string_length(sn->s, 1 , sn->end - sn->s, reg, 0);
535 int r,
len, prev_len, blen, ambig;
537 UChar *p, *prev, *end;
541 if (sn->end <= sn->s)
545 ambig = NSTRING_IS_AMBIG(node);
548 prev_len = enclen(enc, p, end);
553 len = enclen(enc, p, end);
554 if (
len == prev_len || ambig) {
558 r = add_compile_string(prev, prev_len, blen, reg, ambig);
568 return add_compile_string(prev, prev_len, blen, reg, ambig);
574 if (sn->end <= sn->s)
577 return add_compile_string(sn->s, 1 , sn->end - sn->s, reg, 0);
583#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
584 add_length(reg, mbuf->used);
585 return add_bytes(reg, mbuf->p, mbuf->used);
588 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
590 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
591 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
592 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
594 r = add_bytes(reg, mbuf->p, mbuf->used);
597 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
598 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
608 if (IS_NULL(cc->mbuf)) {
609 len = SIZE_OPCODE + SIZE_BITSET;
612 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
616 len = SIZE_OPCODE + SIZE_BITSET;
618#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
619 len += SIZE_LENGTH + cc->mbuf->used;
621 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
633 if (IS_NULL(cc->mbuf)) {
634 if (IS_NCCLASS_NOT(cc))
635 add_opcode(reg, OP_CCLASS_NOT);
637 add_opcode(reg, OP_CCLASS);
639 r = add_bitset(reg, cc->bs);
642 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
643 if (IS_NCCLASS_NOT(cc))
644 add_opcode(reg, OP_CCLASS_MB_NOT);
646 add_opcode(reg, OP_CCLASS_MB);
648 r = add_multi_byte_cclass(cc->mbuf, reg);
651 if (IS_NCCLASS_NOT(cc))
652 add_opcode(reg, OP_CCLASS_MIX_NOT);
654 add_opcode(reg, OP_CCLASS_MIX);
656 r = add_bitset(reg, cc->bs);
658 r = add_multi_byte_cclass(cc->mbuf, reg);
666entry_repeat_range(
regex_t* reg,
int id,
int lower,
int upper)
668#define REPEAT_RANGE_ALLOC 4
672 if (reg->repeat_range_alloc == 0) {
674 CHECK_NULL_RETURN_MEMERR(p);
675 reg->repeat_range = p;
676 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
678 else if (reg->repeat_range_alloc <=
id) {
680 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
683 CHECK_NULL_RETURN_MEMERR(p);
684 reg->repeat_range = p;
685 reg->repeat_range_alloc = n;
688 p = reg->repeat_range;
692 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
697compile_range_repeat_node(
QtfrNode* qn,
int target_len,
int empty_info,
701 int num_repeat = reg->num_repeat;
703 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
705 r = add_mem_num(reg, num_repeat);
707 if ((MemNumType)reg->num_repeat <= 0)
return ONIGERR_TOO_MANY_RANGE_REPEAT;
709 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
712 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
715 r = compile_tree_empty_check(qn->target, reg, empty_info);
719#ifdef USE_SUBEXP_CALL
722 IS_QUANTIFIER_IN_REPEAT(qn)) {
723 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
726 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
729 r = add_mem_num(reg, num_repeat);
734is_anychar_star_quantifier(
QtfrNode* qn)
736 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
737 NTYPE(qn->target) == NT_CANY)
743#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
744#define CKN_ON (ckn > 0)
746#ifdef USE_COMBINATION_EXPLOSION_CHECK
751 int len, mod_tlen, cklen;
753 int infinite = IS_REPEAT_INFINITE(qn->upper);
754 int empty_info = qn->target_empty_info;
755 int tlen = compile_length_tree(qn->target, reg);
757 if (tlen < 0)
return tlen;
759 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
761 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
764 if (NTYPE(qn->target) == NT_CANY) {
765 if (qn->greedy && infinite) {
766 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
767 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
769 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
774 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
778 if (infinite && qn->lower <= 1) {
785 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
793 len += mod_tlen + SIZE_OP_PUSH + cklen;
796 else if (qn->upper == 0) {
797 if (qn->is_referred != 0)
798 len = SIZE_OP_JUMP + tlen;
802 else if (qn->upper == 1 && qn->greedy) {
803 if (qn->lower == 0) {
805 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
808 len = SIZE_OP_PUSH + tlen;
815 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
816 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
819 len = SIZE_OP_REPEAT_INC
820 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
822 len += SIZE_OP_STATE_CHECK;
833 int infinite = IS_REPEAT_INFINITE(qn->upper);
834 int empty_info = qn->target_empty_info;
835 int tlen = compile_length_tree(qn->target, reg);
837 if (tlen < 0)
return tlen;
839 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
841 if (is_anychar_star_quantifier(qn)) {
842 r = compile_tree_n_times(qn->target, qn->lower, reg);
844 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
845 if (IS_MULTILINE(reg->options))
846 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
848 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
851 r = add_state_check_num(reg, ckn);
855 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
858 if (IS_MULTILINE(reg->options)) {
859 r = add_opcode(reg, (CKN_ON ?
860 OP_STATE_CHECK_ANYCHAR_ML_STAR
861 : OP_ANYCHAR_ML_STAR));
864 r = add_opcode(reg, (CKN_ON ?
865 OP_STATE_CHECK_ANYCHAR_STAR
870 r = add_state_check_num(reg, ckn);
877 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
881 if (infinite && qn->lower <= 1) {
883 if (qn->lower == 1) {
884 r = add_opcode_rel_addr(reg, OP_JUMP,
885 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
890 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
892 r = add_state_check_num(reg, ckn);
894 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
897 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
900 r = compile_tree_empty_check(qn->target, reg, empty_info);
902 r = add_opcode_rel_addr(reg, OP_JUMP,
903 -(mod_tlen + (
int )SIZE_OP_JUMP
904 + (
int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
907 if (qn->lower == 0) {
908 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
911 r = compile_tree_empty_check(qn->target, reg, empty_info);
914 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
916 r = add_state_check_num(reg, ckn);
918 r = add_rel_addr(reg,
919 -(mod_tlen + (
int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
922 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (
int )SIZE_OP_PUSH));
925 else if (qn->upper == 0) {
926 if (qn->is_referred != 0) {
927 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
929 r = compile_tree(qn->target, reg);
934 else if (qn->upper == 1 && qn->greedy) {
935 if (qn->lower == 0) {
937 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
939 r = add_state_check_num(reg, ckn);
941 r = add_rel_addr(reg, tlen);
944 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
949 r = compile_tree(qn->target, reg);
951 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
953 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
955 r = add_state_check_num(reg, ckn);
957 r = add_rel_addr(reg, SIZE_OP_JUMP);
960 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
964 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
966 r = compile_tree(qn->target, reg);
969 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
972 r = add_opcode(reg, OP_STATE_CHECK);
974 r = add_state_check_num(reg, ckn);
986 int infinite = IS_REPEAT_INFINITE(qn->upper);
987 int empty_info = qn->target_empty_info;
988 int tlen = compile_length_tree(qn->target, reg);
990 if (tlen < 0)
return tlen;
993 if (NTYPE(qn->target) == NT_CANY) {
994 if (qn->greedy && infinite) {
995 if (IS_NOT_NULL(qn->next_head_exact))
996 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
998 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
1002 if (empty_info != 0)
1003 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1008 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1009 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1013 len = tlen * qn->lower;
1017#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1018 if (IS_NOT_NULL(qn->head_exact))
1019 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1022 if (IS_NOT_NULL(qn->next_head_exact))
1023 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1025 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1028 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1030 else if (qn->upper == 0 && qn->is_referred != 0) {
1031 len = SIZE_OP_JUMP + tlen;
1033 else if (!infinite && qn->greedy &&
1034 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1035 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1036 len = tlen * qn->lower;
1037 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1039 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
1040 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1043 len = SIZE_OP_REPEAT_INC
1044 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1054 int infinite = IS_REPEAT_INFINITE(qn->upper);
1055 int empty_info = qn->target_empty_info;
1056 int tlen = compile_length_tree(qn->target, reg);
1058 if (tlen < 0)
return tlen;
1060 if (is_anychar_star_quantifier(qn)) {
1061 r = compile_tree_n_times(qn->target, qn->lower, reg);
1063 if (IS_NOT_NULL(qn->next_head_exact)) {
1064 if (IS_MULTILINE(reg->options))
1065 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1067 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1069 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1072 if (IS_MULTILINE(reg->options))
1073 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1075 return add_opcode(reg, OP_ANYCHAR_STAR);
1079 if (empty_info != 0)
1080 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1085 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1086 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1088#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1089 if (IS_NOT_NULL(qn->head_exact))
1090 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1093 if (IS_NOT_NULL(qn->next_head_exact))
1094 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1096 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1099 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1104 r = compile_tree_n_times(qn->target, qn->lower, reg);
1109#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1110 if (IS_NOT_NULL(qn->head_exact)) {
1111 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1112 mod_tlen + SIZE_OP_JUMP);
1114 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1115 r = compile_tree_empty_check(qn->target, reg, empty_info);
1117 r = add_opcode_rel_addr(reg, OP_JUMP,
1118 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1122 if (IS_NOT_NULL(qn->next_head_exact)) {
1123 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1124 mod_tlen + SIZE_OP_JUMP);
1126 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1127 r = compile_tree_empty_check(qn->target, reg, empty_info);
1129 r = add_opcode_rel_addr(reg, OP_JUMP,
1130 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1133 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1135 r = compile_tree_empty_check(qn->target, reg, empty_info);
1137 r = add_opcode_rel_addr(reg, OP_JUMP,
1138 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH));
1142 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1144 r = compile_tree_empty_check(qn->target, reg, empty_info);
1146 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (
int )SIZE_OP_PUSH));
1149 else if (qn->upper == 0 && qn->is_referred != 0) {
1150 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1152 r = compile_tree(qn->target, reg);
1154 else if (!infinite && qn->greedy &&
1155 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1156 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1157 int n = qn->upper - qn->lower;
1159 r = compile_tree_n_times(qn->target, qn->lower, reg);
1162 for (i = 0; i < n; i++) {
1163 r = add_opcode_rel_addr(reg, OP_PUSH,
1164 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1166 r = compile_tree(qn->target, reg);
1170 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
1171 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1173 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1175 r = compile_tree(qn->target, reg);
1178 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1188 OnigOptionType prev = reg->options;
1190 reg->options = node->option;
1191 tlen = compile_length_tree(node->target, reg);
1192 reg->options = prev;
1194 if (tlen < 0)
return tlen;
1196 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1197 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1198 + tlen + SIZE_OP_SET_OPTION;
1208 OnigOptionType prev = reg->options;
1210 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1211 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1213 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1215 r = add_opcode(reg, OP_FAIL);
1219 reg->options = node->option;
1220 r = compile_tree(node->target, reg);
1221 reg->options = prev;
1223 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1225 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1236 if (node->type == ENCLOSE_OPTION)
1237 return compile_length_option_node(node, reg);
1240 tlen = compile_length_tree(node->target, reg);
1241 if (tlen < 0)
return tlen;
1246 switch (node->type) {
1247 case ENCLOSE_MEMORY:
1248#ifdef USE_SUBEXP_CALL
1249 if (IS_ENCLOSE_CALLED(node)) {
1250 len = SIZE_OP_MEMORY_START_PUSH + tlen
1251 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1252 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1253 len += (IS_ENCLOSE_RECURSION(node)
1254 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1256 len += (IS_ENCLOSE_RECURSION(node)
1257 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1259 else if (IS_ENCLOSE_RECURSION(node)) {
1260 len = SIZE_OP_MEMORY_START_PUSH;
1261 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1262 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1267 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1268 len = SIZE_OP_MEMORY_START_PUSH;
1270 len = SIZE_OP_MEMORY_START;
1272 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1273 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1277 case ENCLOSE_STOP_BACKTRACK:
1281#ifndef USE_MATCH_CACHE
1282 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1283 QtfrNode* qn = NQTFR(node->target);
1284 tlen = compile_length_tree(qn->target, reg);
1285 if (tlen < 0)
return tlen;
1287 len = tlen * qn->lower
1288 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1292 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1293#ifndef USE_MATCH_CACHE
1298 case ENCLOSE_CONDITION:
1299 len = SIZE_OP_CONDITION;
1300 if (NTYPE(node->target) == NT_ALT) {
1301 Node* x = node->target;
1303 tlen = compile_length_tree(NCAR(x), reg);
1304 if (tlen < 0)
return tlen;
1305 len += tlen + SIZE_OP_JUMP;
1306 if (NCDR(x) == NULL)
return ONIGERR_PARSER_BUG;
1308 tlen = compile_length_tree(NCAR(x), reg);
1309 if (tlen < 0)
return tlen;
1311 if (NCDR(x) != NULL)
return ONIGERR_INVALID_CONDITION_PATTERN;
1314 return ONIGERR_PARSER_BUG;
1318 case ENCLOSE_ABSENT:
1319 len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1323 return ONIGERR_TYPE_BUG;
1330static int get_char_length_tree(
Node* node,
regex_t* reg,
int*
len);
1337 if (node->type == ENCLOSE_OPTION)
1338 return compile_option_node(node, reg);
1340 switch (node->type) {
1341 case ENCLOSE_MEMORY:
1342#ifdef USE_SUBEXP_CALL
1343 if (IS_ENCLOSE_CALLED(node)) {
1344 r = add_opcode(reg, OP_CALL);
1346 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1347 node->state |= NST_ADDR_FIXED;
1348 r = add_abs_addr(reg, (
int )node->call_addr);
1350 len = compile_length_tree(node->target, reg);
1351 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1352 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1353 len += (IS_ENCLOSE_RECURSION(node)
1354 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1356 len += (IS_ENCLOSE_RECURSION(node)
1357 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1359 r = add_opcode_rel_addr(reg, OP_JUMP,
len);
1363 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1364 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1366 r = add_opcode(reg, OP_MEMORY_START);
1368 r = add_mem_num(reg, node->regnum);
1370 r = compile_tree(node->target, reg);
1372#ifdef USE_SUBEXP_CALL
1373 if (IS_ENCLOSE_CALLED(node)) {
1374 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1375 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1376 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1378 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1379 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1382 r = add_mem_num(reg, node->regnum);
1384 r = add_opcode(reg, OP_RETURN);
1386 else if (IS_ENCLOSE_RECURSION(node)) {
1387 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1388 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1390 r = add_opcode(reg, OP_MEMORY_END_REC);
1392 r = add_mem_num(reg, node->regnum);
1397 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1398 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1400 r = add_opcode(reg, OP_MEMORY_END);
1402 r = add_mem_num(reg, node->regnum);
1406 case ENCLOSE_STOP_BACKTRACK:
1410#ifndef USE_MATCH_CACHE
1411 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1412 QtfrNode* qn = NQTFR(node->target);
1413 r = compile_tree_n_times(qn->target, qn->lower, reg);
1416 len = compile_length_tree(qn->target, reg);
1419 r = add_opcode_rel_addr(reg, OP_PUSH,
len + SIZE_OP_POP + SIZE_OP_JUMP);
1421 r = compile_tree(qn->target, reg);
1423 r = add_opcode(reg, OP_POP);
1425 r = add_opcode_rel_addr(reg, OP_JUMP,
1426 -((
int )SIZE_OP_PUSH +
len + (
int )SIZE_OP_POP + (
int )SIZE_OP_JUMP));
1430 r = add_opcode(reg, OP_PUSH_STOP_BT);
1432 r = compile_tree(node->target, reg);
1434 r = add_opcode(reg, OP_POP_STOP_BT);
1435#ifndef USE_MATCH_CACHE
1440 case ENCLOSE_CONDITION:
1441 r = add_opcode(reg, OP_CONDITION);
1443 r = add_mem_num(reg, node->regnum);
1446 if (NTYPE(node->target) == NT_ALT) {
1447 Node* x = node->target;
1450 len = compile_length_tree(NCAR(x), reg);
1452 if (NCDR(x) == NULL)
return ONIGERR_PARSER_BUG;
1454 len2 = compile_length_tree(NCAR(x), reg);
1455 if (len2 < 0)
return len2;
1456 if (NCDR(x) != NULL)
return ONIGERR_INVALID_CONDITION_PATTERN;
1459 r = add_rel_addr(reg,
len + SIZE_OP_JUMP);
1461 r = compile_tree(NCAR(x), reg);
1463 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1466 r = compile_tree(NCAR(x), reg);
1469 return ONIGERR_PARSER_BUG;
1473 case ENCLOSE_ABSENT:
1474 len = compile_length_tree(node->target, reg);
1477 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1479 r = add_opcode_rel_addr(reg, OP_ABSENT,
len + SIZE_OP_ABSENT_END);
1481 r = compile_tree(node->target, reg);
1483 r = add_opcode(reg, OP_ABSENT_END);
1487 return ONIGERR_TYPE_BUG;
1501 tlen = compile_length_tree(node->target, reg);
1502 if (tlen < 0)
return tlen;
1505 switch (node->type) {
1506 case ANCHOR_PREC_READ:
1507 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1509 case ANCHOR_PREC_READ_NOT:
1510 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1512 case ANCHOR_LOOK_BEHIND:
1513 len = SIZE_OP_LOOK_BEHIND + tlen;
1515 case ANCHOR_LOOK_BEHIND_NOT:
1516 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1532 switch (node->type) {
1533 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF);
break;
1534 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF);
break;
1535 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE);
break;
1536 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE);
break;
1537 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF);
break;
1538 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION);
break;
1540 case ANCHOR_WORD_BOUND:
1541 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1542 else r = add_opcode(reg, OP_WORD_BOUND);
1544 case ANCHOR_NOT_WORD_BOUND:
1545 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1546 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1548#ifdef USE_WORD_BEGIN_END
1549 case ANCHOR_WORD_BEGIN:
1550 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1551 else r = add_opcode(reg, OP_WORD_BEGIN);
1553 case ANCHOR_WORD_END:
1554 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1555 else r = add_opcode(reg, OP_WORD_END);
1558 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP);
break;
1560 case ANCHOR_PREC_READ:
1561 r = add_opcode(reg, OP_PUSH_POS);
1563 r = compile_tree(node->target, reg);
1565 r = add_opcode(reg, OP_POP_POS);
1568 case ANCHOR_PREC_READ_NOT:
1569 len = compile_length_tree(node->target, reg);
1571 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT,
len + SIZE_OP_FAIL_POS);
1573 r = compile_tree(node->target, reg);
1575 r = add_opcode(reg, OP_FAIL_POS);
1578 case ANCHOR_LOOK_BEHIND:
1581 r = add_opcode(reg, OP_LOOK_BEHIND);
1583 if (node->char_len < 0) {
1584 r = get_char_length_tree(node->target, reg, &n);
1585 if (r)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1589 r = add_length(reg, n);
1591 r = compile_tree(node->target, reg);
1595 case ANCHOR_LOOK_BEHIND_NOT:
1598 len = compile_length_tree(node->target, reg);
1599 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1600 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1602 if (node->char_len < 0) {
1603 r = get_char_length_tree(node->target, reg, &n);
1604 if (r)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1608 r = add_length(reg, n);
1610 r = compile_tree(node->target, reg);
1612 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1617 return ONIGERR_TYPE_BUG;
1634 r = compile_length_tree(NCAR(node), reg);
1635 if (r < 0)
return r;
1637 }
while (IS_NOT_NULL(node = NCDR(node)));
1646 r = compile_length_tree(NCAR(node), reg);
1647 if (r < 0)
return r;
1650 }
while (IS_NOT_NULL(node = NCDR(node)));
1652 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1657 if (NSTRING_IS_RAW(node))
1658 r = compile_length_string_raw_node(NSTR(node), reg);
1660 r = compile_length_string_node(node, reg);
1664 r = compile_length_cclass_node(NCCLASS(node), reg);
1676#ifdef USE_BACKREF_WITH_LEVEL
1677 if (IS_BACKREF_NEST_LEVEL(br)) {
1678 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1679 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1683 if (br->back_num == 1) {
1684 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1685 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1688 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1693#ifdef USE_SUBEXP_CALL
1700 r = compile_length_quantifier_node(NQTFR(node), reg);
1704 r = compile_length_enclose_node(NENCLOSE(node), reg);
1708 r = compile_length_anchor_node(NANCHOR(node), reg);
1712 return ONIGERR_TYPE_BUG;
1728 r = compile_tree(NCAR(node), reg);
1729 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1737 len += compile_length_tree(NCAR(x), reg);
1738 if (NCDR(x) != NULL) {
1739 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1741 }
while (IS_NOT_NULL(x = NCDR(x)));
1742 pos = reg->used +
len;
1745 len = compile_length_tree(NCAR(node), reg);
1746 if (IS_NOT_NULL(NCDR(node))) {
1747 r = add_opcode_rel_addr(reg, OP_PUSH,
len + SIZE_OP_JUMP);
1750 r = compile_tree(NCAR(node), reg);
1752 if (IS_NOT_NULL(NCDR(node))) {
1753 len = pos - (reg->used + SIZE_OP_JUMP);
1754 r = add_opcode_rel_addr(reg, OP_JUMP,
len);
1757 }
while (IS_NOT_NULL(node = NCDR(node)));
1762 if (NSTRING_IS_RAW(node))
1763 r = compile_string_raw_node(NSTR(node), reg);
1765 r = compile_string_node(node, reg);
1769 r = compile_cclass_node(NCCLASS(node), reg);
1776 switch (NCTYPE(node)->ctype) {
1777 case ONIGENC_CTYPE_WORD:
1778 if (NCTYPE(node)->ascii_range != 0) {
1779 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1780 else op = OP_ASCII_WORD;
1783 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1788 return ONIGERR_TYPE_BUG;
1791 r = add_opcode(reg, op);
1796 if (IS_MULTILINE(reg->options))
1797 r = add_opcode(reg, OP_ANYCHAR_ML);
1799 r = add_opcode(reg, OP_ANYCHAR);
1806#ifdef USE_BACKREF_WITH_LEVEL
1807 if (IS_BACKREF_NEST_LEVEL(br)) {
1808 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1810 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1812 r = add_length(reg, br->nest_level);
1815 goto add_bacref_mems;
1819 if (br->back_num == 1) {
1820 n = br->back_static[0];
1821 if (IS_IGNORECASE(reg->options)) {
1822 r = add_opcode(reg, OP_BACKREFN_IC);
1824 r = add_mem_num(reg, n);
1828 case 1: r = add_opcode(reg, OP_BACKREF1);
break;
1829 case 2: r = add_opcode(reg, OP_BACKREF2);
break;
1831 r = add_opcode(reg, OP_BACKREFN);
1833 r = add_mem_num(reg, n);
1842 if (IS_IGNORECASE(reg->options)) {
1843 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1846 r = add_opcode(reg, OP_BACKREF_MULTI);
1850#ifdef USE_BACKREF_WITH_LEVEL
1853 r = add_length(reg, br->back_num);
1856 for (i = br->back_num - 1; i >= 0; i--) {
1857 r = add_mem_num(reg, p[i]);
1864#ifdef USE_SUBEXP_CALL
1866 r = compile_call(NCALL(node), reg);
1871 r = compile_quantifier_node(NQTFR(node), reg);
1875 r = compile_enclose_node(NENCLOSE(node), reg);
1879 r = compile_anchor_node(NANCHOR(node), reg);
1884 fprintf(stderr,
"compile_tree: undefined node type %d\n", NTYPE(node));
1892#ifdef USE_NAMED_GROUP
1898 Node* node = *plink;
1900 switch (NTYPE(node)) {
1904 r = noname_disable_map(&(NCAR(node)), map, counter);
1905 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1910 Node** ptarget = &(NQTFR(node)->target);
1911 Node* old = *ptarget;
1912 r = noname_disable_map(ptarget, map, counter);
1913 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1914 onig_reduce_nested_quantifier(node, *ptarget);
1922 if (en->type == ENCLOSE_MEMORY) {
1923 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1925 map[en->regnum].new_val = *counter;
1926 en->regnum = *counter;
1928 else if (en->regnum != 0) {
1929 *plink = en->target;
1930 en->target = NULL_NODE;
1931 onig_node_free(node);
1932 r = noname_disable_map(plink, map, counter);
1936 r = noname_disable_map(&(en->target), map, counter);
1941 if (NANCHOR(node)->target)
1942 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1955 int i, pos, n, old_num;
1959 if (! IS_BACKREF_NAME_REF(bn))
1960 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1962 old_num = bn->back_num;
1963 if (IS_NULL(bn->back_dynamic))
1964 backs = bn->back_static;
1966 backs = bn->back_dynamic;
1968 for (i = 0, pos = 0; i < old_num; i++) {
1969 if (backs[i] > num_mem)
return ONIGERR_INVALID_BACKREF;
1970 n = map[backs[i]].new_val;
1986 switch (NTYPE(node)) {
1990 r = renumber_by_map(NCAR(node), map, num_mem);
1991 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1994 r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1999 if (en->type == ENCLOSE_CONDITION) {
2000 if (en->regnum > num_mem)
return ONIGERR_INVALID_BACKREF;
2001 en->regnum = map[en->regnum].new_val;
2003 r = renumber_by_map(en->target, map, num_mem);
2008 r = renumber_node_backref(node, map, num_mem);
2012 if (NANCHOR(node)->target)
2013 r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
2024numbered_ref_check(
Node* node)
2028 switch (NTYPE(node)) {
2032 r = numbered_ref_check(NCAR(node));
2033 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2036 r = numbered_ref_check(NQTFR(node)->target);
2039 r = numbered_ref_check(NENCLOSE(node)->target);
2043 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2044 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2048 if (NANCHOR(node)->target)
2049 r = numbered_ref_check(NANCHOR(node)->target);
2062 int r, i, pos, counter;
2067 CHECK_NULL_RETURN_MEMERR(map);
2068 for (i = 1; i <= env->num_mem; i++) {
2072 r = noname_disable_map(root, map, &counter);
2073 if (r != 0)
return r;
2075 r = renumber_by_map(*root, map, env->num_mem);
2076 if (r != 0)
return r;
2078 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2079 if (map[i].new_val > 0) {
2080 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2085 loc = env->capture_history;
2086 BIT_STATUS_CLEAR(env->capture_history);
2087 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2088 if (BIT_STATUS_AT(loc, i)) {
2089 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2093 env->num_mem = env->num_named;
2094 reg->num_mem = env->num_named;
2096 return onig_renumber_name_table(reg, map);
2100#ifdef USE_SUBEXP_CALL
2108 for (i = 0; i < uslist->num; i++) {
2109 en = NENCLOSE(uslist->us[i].target);
2110 if (! IS_ENCLOSE_ADDR_FIXED(en))
return ONIGERR_PARSER_BUG;
2111 addr = en->call_addr;
2112 offset = uslist->us[i].offset;
2114 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2120#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2122quantifiers_memory_node_info(
Node* node)
2126 switch (NTYPE(node)) {
2132 v = quantifiers_memory_node_info(NCAR(node));
2134 }
while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2138# ifdef USE_SUBEXP_CALL
2140 if (IS_CALL_RECURSION(NCALL(node))) {
2141 return NQ_TARGET_IS_EMPTY_REC;
2144 r = quantifiers_memory_node_info(NCALL(node)->target);
2151 if (qn->upper != 0) {
2152 r = quantifiers_memory_node_info(qn->target);
2161 case ENCLOSE_MEMORY:
2162 return NQ_TARGET_IS_EMPTY_MEM;
2165 case ENCLOSE_OPTION:
2166 case ENCLOSE_STOP_BACKTRACK:
2167 case ENCLOSE_CONDITION:
2168 case ENCLOSE_ABSENT:
2169 r = quantifiers_memory_node_info(en->target);
2192get_min_match_length(
Node* node, OnigDistance *min,
ScanEnv* env)
2198 switch (NTYPE(node)) {
2203 Node** nodes = SCANENV_MEM_NODES(env);
2205 if (br->state & NST_RECURSION)
break;
2207 backs = BACKREFS_P(br);
2208 if (backs[0] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2209 r = get_min_match_length(nodes[backs[0]], min, env);
2211 for (i = 1; i < br->back_num; i++) {
2212 if (backs[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2213 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2215 if (*min > tmin) *min = tmin;
2220#ifdef USE_SUBEXP_CALL
2222 if (IS_CALL_RECURSION(NCALL(node))) {
2224 if (IS_ENCLOSE_MIN_FIXED(en))
2228 r = get_min_match_length(NCALL(node)->target, min, env);
2234 r = get_min_match_length(NCAR(node), &tmin, env);
2235 if (r == 0) *min += tmin;
2236 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2245 r = get_min_match_length(x, &tmin, env);
2247 if (y == node) *min = tmin;
2248 else if (*min > tmin) *min = tmin;
2249 }
while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2256 *min = sn->end - sn->s;
2273 if (qn->lower > 0) {
2274 r = get_min_match_length(qn->target, min, env);
2276 *min = distance_multiply(*min, qn->lower);
2285 case ENCLOSE_MEMORY:
2286 if (IS_ENCLOSE_MIN_FIXED(en))
2289 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2292 SET_ENCLOSE_STATUS(node, NST_MARK1);
2293 r = get_min_match_length(en->target, min, env);
2294 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2297 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2303 case ENCLOSE_OPTION:
2304 case ENCLOSE_STOP_BACKTRACK:
2305 case ENCLOSE_CONDITION:
2306 r = get_min_match_length(en->target, min, env);
2309 case ENCLOSE_ABSENT:
2324get_max_match_length(
Node* node, OnigDistance *max,
ScanEnv* env)
2330 switch (NTYPE(node)) {
2333 r = get_max_match_length(NCAR(node), &tmax, env);
2335 *max = distance_add(*max, tmax);
2336 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2341 r = get_max_match_length(NCAR(node), &tmax, env);
2342 if (r == 0 && *max < tmax) *max = tmax;
2343 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2349 *max = sn->end - sn->s;
2354 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2359 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2366 Node** nodes = SCANENV_MEM_NODES(env);
2368 if (br->state & NST_RECURSION) {
2369 *max = ONIG_INFINITE_DISTANCE;
2372 backs = BACKREFS_P(br);
2373 for (i = 0; i < br->back_num; i++) {
2374 if (backs[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2375 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2377 if (*max < tmax) *max = tmax;
2382#ifdef USE_SUBEXP_CALL
2384 if (! IS_CALL_RECURSION(NCALL(node)))
2385 r = get_max_match_length(NCALL(node)->target, max, env);
2387 *max = ONIG_INFINITE_DISTANCE;
2395 if (qn->upper != 0) {
2396 r = get_max_match_length(qn->target, max, env);
2397 if (r == 0 && *max != 0) {
2398 if (! IS_REPEAT_INFINITE(qn->upper))
2399 *max = distance_multiply(*max, qn->upper);
2401 *max = ONIG_INFINITE_DISTANCE;
2411 case ENCLOSE_MEMORY:
2412 if (IS_ENCLOSE_MAX_FIXED(en))
2415 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2416 *max = ONIG_INFINITE_DISTANCE;
2418 SET_ENCLOSE_STATUS(node, NST_MARK1);
2419 r = get_max_match_length(en->target, max, env);
2420 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2423 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2429 case ENCLOSE_OPTION:
2430 case ENCLOSE_STOP_BACKTRACK:
2431 case ENCLOSE_CONDITION:
2432 r = get_max_match_length(en->target, max, env);
2435 case ENCLOSE_ABSENT:
2449#define GET_CHAR_LEN_VARLEN -1
2450#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2454get_char_length_tree1(
Node* node,
regex_t* reg,
int*
len,
int level)
2461 switch (NTYPE(node)) {
2464 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2466 *
len = (int )distance_add(*
len, tlen);
2467 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2475 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2476 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2477 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2486 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2488 r = GET_CHAR_LEN_VARLEN;
2500 while (s < sn->end) {
2501 s += enclen(reg->enc, s, sn->end);
2510 if (qn->lower == qn->upper) {
2511 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2513 *
len = (int )distance_multiply(tlen, qn->lower);
2516 r = GET_CHAR_LEN_VARLEN;
2520#ifdef USE_SUBEXP_CALL
2522 if (! IS_CALL_RECURSION(NCALL(node)))
2523 r = get_char_length_tree1(NCALL(node)->target, reg,
len, level);
2525 r = GET_CHAR_LEN_VARLEN;
2542 case ENCLOSE_MEMORY:
2543#ifdef USE_SUBEXP_CALL
2544 if (IS_ENCLOSE_CLEN_FIXED(en))
2545 *
len = en->char_len;
2547 r = get_char_length_tree1(en->target, reg,
len, level);
2549 en->char_len = *
len;
2550 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2555 case ENCLOSE_OPTION:
2556 case ENCLOSE_STOP_BACKTRACK:
2557 case ENCLOSE_CONDITION:
2558 r = get_char_length_tree1(en->target, reg,
len, level);
2560 case ENCLOSE_ABSENT:
2571 r = GET_CHAR_LEN_VARLEN;
2581 return get_char_length_tree1(node, reg,
len, 0);
2601 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2602 NCTYPE(y)->not != NCTYPE(x)->not &&
2603 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2613 tmp = x; x = y; y = tmp;
2633 switch (NCTYPE(y)->ctype) {
2634 case ONIGENC_CTYPE_WORD:
2635 if (NCTYPE(y)->not == 0) {
2636 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2637 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2638 if (BITSET_AT(xc->bs, i)) {
2639 if (NCTYPE(y)->ascii_range) {
2640 if (IS_CODE_SB_WORD(reg->enc, i))
return 0;
2643 if (ONIGENC_IS_CODE_WORD(reg->enc, i))
return 0;
2652 if (IS_NOT_NULL(xc->mbuf))
return 0;
2653 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2655 if (NCTYPE(y)->ascii_range)
2656 is_word = IS_CODE_SB_WORD(reg->enc, i);
2658 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2660 if (!IS_NCCLASS_NOT(xc)) {
2661 if (BITSET_AT(xc->bs, i))
2665 if (! BITSET_AT(xc->bs, i))
2684 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2685 v = BITSET_AT(xc->bs, i);
2686 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2687 (v == 0 && IS_NCCLASS_NOT(xc))) {
2688 v = BITSET_AT(yc->bs, i);
2689 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2690 (v == 0 && IS_NCCLASS_NOT(yc)))
2694 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2695 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2714 if (NSTRING_LEN(x) == 0)
2719 switch (NCTYPE(y)->ctype) {
2720 case ONIGENC_CTYPE_WORD:
2721 if (NCTYPE(y)->ascii_range) {
2722 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2723 return NCTYPE(y)->not;
2725 return !(NCTYPE(y)->not);
2728 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2729 return NCTYPE(y)->not;
2731 return !(NCTYPE(y)->not);
2743 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2744 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2745 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2753 len = NSTRING_LEN(x);
2754 if (
len > NSTRING_LEN(y))
len = NSTRING_LEN(y);
2755 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2760 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i <
len; i++, p++, q++) {
2761 if (*p != *q)
return 1;
2781get_head_value_node(
Node* node,
int exact,
regex_t* reg)
2783 Node* n = NULL_NODE;
2785 switch (NTYPE(node)) {
2789#ifdef USE_SUBEXP_CALL
2802 n = get_head_value_node(NCAR(node), exact, reg);
2808 if (sn->end <= sn->s)
2812 NSTRING_IS_RAW(node) || !IS_IGNORECASE(reg->options)) {
2821 if (qn->lower > 0) {
2822#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2823 if (IS_NOT_NULL(qn->head_exact))
2827 n = get_head_value_node(qn->target, exact, reg);
2836 case ENCLOSE_OPTION:
2838 OnigOptionType options = reg->options;
2840 reg->options = NENCLOSE(node)->option;
2841 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2842 reg->options = options;
2846 case ENCLOSE_MEMORY:
2847 case ENCLOSE_STOP_BACKTRACK:
2848 case ENCLOSE_CONDITION:
2849 n = get_head_value_node(en->target, exact, reg);
2852 case ENCLOSE_ABSENT:
2859 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2860 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2871check_type_tree(
Node* node,
int type_mask,
int enclose_mask,
int anchor_mask)
2876 if ((NTYPE2BIT(type) & type_mask) == 0)
2883 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2885 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2889 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2896 if ((en->type & enclose_mask) == 0)
2899 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2904 type = NANCHOR(node)->type;
2905 if ((type & anchor_mask) == 0)
2908 if (NANCHOR(node)->target)
2909 r = check_type_tree(NANCHOR(node)->target,
2910 type_mask, enclose_mask, anchor_mask);
2919#ifdef USE_SUBEXP_CALL
2921# define RECURSION_EXIST 1
2922# define RECURSION_INFINITE 2
2925subexp_inf_recursive_check(
Node* node,
ScanEnv* env,
int head)
2940 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2941 if (ret < 0 || ret == RECURSION_INFINITE)
return ret;
2944 ret = get_min_match_length(NCAR(x), &min, env);
2945 if (ret != 0)
return ret;
2946 if (min != 0) head = 0;
2948 }
while (IS_NOT_NULL(x = NCDR(x)));
2955 r = RECURSION_EXIST;
2957 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2958 if (ret < 0 || ret == RECURSION_INFINITE)
return ret;
2960 }
while (IS_NOT_NULL(node = NCDR(node)));
2965 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2966 if (r == RECURSION_EXIST) {
2967 if (NQTFR(node)->lower == 0) r = 0;
2975 case ANCHOR_PREC_READ:
2976 case ANCHOR_PREC_READ_NOT:
2977 case ANCHOR_LOOK_BEHIND:
2978 case ANCHOR_LOOK_BEHIND_NOT:
2979 r = subexp_inf_recursive_check(an->target, env, head);
2986 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2990 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2992 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2993 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2995 SET_ENCLOSE_STATUS(node, NST_MARK2);
2996 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2997 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3009subexp_inf_recursive_check_trav(
Node* node,
ScanEnv* env)
3019 r = subexp_inf_recursive_check_trav(NCAR(node), env);
3020 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3024 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
3031 case ANCHOR_PREC_READ:
3032 case ANCHOR_PREC_READ_NOT:
3033 case ANCHOR_LOOK_BEHIND:
3034 case ANCHOR_LOOK_BEHIND_NOT:
3035 r = subexp_inf_recursive_check_trav(an->target, env);
3045 if (IS_ENCLOSE_RECURSION(en)) {
3046 SET_ENCLOSE_STATUS(node, NST_MARK1);
3047 r = subexp_inf_recursive_check(en->target, env, 1);
3048 if (r > 0)
return ONIGERR_NEVER_ENDING_RECURSION;
3049 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3051 r = subexp_inf_recursive_check_trav(en->target, env);
3064subexp_recursive_check(
Node* node)
3068 switch (NTYPE(node)) {
3072 r |= subexp_recursive_check(NCAR(node));
3073 }
while (IS_NOT_NULL(node = NCDR(node)));
3077 r = subexp_recursive_check(NQTFR(node)->target);
3084 case ANCHOR_PREC_READ:
3085 case ANCHOR_PREC_READ_NOT:
3086 case ANCHOR_LOOK_BEHIND:
3087 case ANCHOR_LOOK_BEHIND_NOT:
3088 r = subexp_recursive_check(an->target);
3095 r = subexp_recursive_check(NCALL(node)->target);
3096 if (r != 0) SET_CALL_RECURSION(node);
3100 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3102 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3105 SET_ENCLOSE_STATUS(node, NST_MARK2);
3106 r = subexp_recursive_check(NENCLOSE(node)->target);
3107 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3120subexp_recursive_check_trav(
Node* node,
ScanEnv* env)
3122# define FOUND_CALLED_NODE 1
3134 ret = subexp_recursive_check_trav(NCAR(node), env);
3135 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3136 else if (ret < 0)
return ret;
3137 }
while (IS_NOT_NULL(node = NCDR(node)));
3142 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3143 if (NQTFR(node)->upper == 0) {
3144 if (r == FOUND_CALLED_NODE)
3145 NQTFR(node)->is_referred = 1;
3153 case ANCHOR_PREC_READ:
3154 case ANCHOR_PREC_READ_NOT:
3155 case ANCHOR_LOOK_BEHIND:
3156 case ANCHOR_LOOK_BEHIND_NOT:
3157 r = subexp_recursive_check_trav(an->target, env);
3167 if (! IS_ENCLOSE_RECURSION(en)) {
3168 if (IS_ENCLOSE_CALLED(en)) {
3169 SET_ENCLOSE_STATUS(node, NST_MARK1);
3170 r = subexp_recursive_check(en->target);
3171 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3172 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3175 r = subexp_recursive_check_trav(en->target, env);
3176 if (IS_ENCLOSE_CALLED(en))
3177 r |= FOUND_CALLED_NODE;
3198 r = setup_subexp_call(NCAR(node), env);
3199 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3204 r = setup_subexp_call(NCAR(node), env);
3205 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3209 r = setup_subexp_call(NQTFR(node)->target, env);
3212 r = setup_subexp_call(NENCLOSE(node)->target, env);
3218 Node** nodes = SCANENV_MEM_NODES(env);
3220 if (cn->group_num != 0) {
3221 int gnum = cn->group_num;
3223# ifdef USE_NAMED_GROUP
3224 if (env->num_named > 0 &&
3225 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3226 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3227 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3230 if (gnum > env->num_mem) {
3231 onig_scan_env_set_error_string(env,
3232 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3233 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3236# ifdef USE_NAMED_GROUP
3239 cn->target = nodes[cn->group_num];
3240 if (IS_NULL(cn->target)) {
3241 onig_scan_env_set_error_string(env,
3242 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3243 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3245 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3246 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3247 cn->unset_addr_list = env->unset_addr_list;
3249# ifdef USE_NAMED_GROUP
3250# ifdef USE_PERL_SUBEXP_CALL
3251 else if (cn->name == cn->name_end) {
3258 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3261 onig_scan_env_set_error_string(env,
3262 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3263 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3266 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3267 onig_scan_env_set_error_string(env,
3268 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3269 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3272 cn->group_num = refs[0];
3285 case ANCHOR_PREC_READ:
3286 case ANCHOR_PREC_READ_NOT:
3287 case ANCHOR_LOOK_BEHIND:
3288 case ANCHOR_LOOK_BEHIND_NOT:
3289 r = setup_subexp_call(an->target, env);
3303#define IN_ALT (1<<0)
3304#define IN_NOT (1<<1)
3305#define IN_REPEAT (1<<2)
3306#define IN_VAR_REPEAT (1<<3)
3307#define IN_CALL (1<<4)
3308#define IN_RECCALL (1<<5)
3309#define IN_LOOK_BEHIND (1<<6)
3316divide_look_behind_alternatives(
Node* node)
3318 Node *head, *np, *insert_node;
3320 int anc_type = an->type;
3324 swap_node(node, head);
3326 NANCHOR(head)->target = np;
3329 while ((np = NCDR(np)) != NULL_NODE) {
3330 insert_node = onig_node_new_anchor(anc_type);
3331 CHECK_NULL_RETURN_MEMERR(insert_node);
3332 NANCHOR(insert_node)->target = NCAR(np);
3333 NCAR(np) = insert_node;
3336 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3339 SET_NTYPE(np, NT_LIST);
3340 }
while ((np = NCDR(np)) != NULL_NODE);
3351 r = get_char_length_tree(an->target, reg, &
len);
3354 else if (r == GET_CHAR_LEN_VARLEN)
3355 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3356 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3357 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3358 r = divide_look_behind_alternatives(node);
3360 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3373 if (type == NT_QTFR) {
3375 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3376#ifdef USE_QTFR_PEEK_NEXT
3377 Node* n = get_head_value_node(next_node, 1, reg);
3379 if (IS_NOT_NULL(n) && NSTR(n)->s[0] !=
'\0') {
3380 qn->next_head_exact = n;
3384 if (qn->lower <= 1) {
3385 int ttype = NTYPE(qn->target);
3386 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3388 x = get_head_value_node(qn->target, 0, reg);
3389 if (IS_NOT_NULL(x)) {
3390 y = get_head_value_node(next_node, 0, reg);
3391 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3392 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3393 CHECK_NULL_RETURN_MEMERR(en);
3394 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3395 swap_node(node, en);
3396 NENCLOSE(node)->target = en;
3403 else if (type == NT_ENCLOSE) {
3405 if (en->type == ENCLOSE_MEMORY && !IS_ENCLOSE_CALLED(en)) {
3415update_string_node_case_fold(
regex_t* reg,
Node *node)
3417 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3418 UChar *sbuf, *ebuf, *sp;
3420 OnigDistance sbuf_size;
3424 sbuf_size = (end - sn->s) * 2;
3425 sbuf = (UChar* )
xmalloc(sbuf_size);
3426 CHECK_NULL_RETURN_MEMERR(sbuf);
3427 ebuf = sbuf + sbuf_size;
3432 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3433 for (i = 0; i <
len; i++) {
3435 UChar* p = (UChar* )
xrealloc(sbuf, sbuf_size * 2);
3438 return ONIGERR_MEMORY;
3441 sp = sbuf + sbuf_size;
3443 ebuf = sbuf + sbuf_size;
3450 r = onig_node_str_set(node, sbuf, sp);
3457expand_case_fold_make_rem_string(
Node** rnode, UChar *s, UChar *end,
3463 node = onig_node_new_str(s, end);
3464 if (IS_NULL(node))
return ONIGERR_MEMORY;
3466 r = update_string_node_case_fold(reg, node);
3468 onig_node_free(node);
3472 NSTRING_SET_AMBIG(node);
3473 NSTRING_SET_DONT_GET_OPT_INFO(node);
3484 for (i = 0; i < item_num; i++) {
3485 if (items[i].byte_len != slen) {
3488 if (items[i].code_len != 1) {
3497 UChar *p,
int slen, UChar *end,
3500 int r, i, j,
len, varlen;
3501 Node *anode, *var_anode, *snode, *xnode, *an;
3502 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3504 *rnode = var_anode = NULL_NODE;
3507 for (i = 0; i < item_num; i++) {
3508 if (items[i].byte_len != slen) {
3515 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3516 if (IS_NULL(var_anode))
return ONIGERR_MEMORY;
3518 xnode = onig_node_new_list(NULL, NULL);
3519 if (IS_NULL(xnode))
goto mem_err;
3520 NCAR(var_anode) = xnode;
3522 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3523 if (IS_NULL(anode))
goto mem_err;
3524 NCAR(xnode) = anode;
3527 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3528 if (IS_NULL(anode))
return ONIGERR_MEMORY;
3531 snode = onig_node_new_str(p, p + slen);
3532 if (IS_NULL(snode))
goto mem_err;
3534 NCAR(anode) = snode;
3536 for (i = 0; i < item_num; i++) {
3537 snode = onig_node_new_str(NULL, NULL);
3538 if (IS_NULL(snode))
goto mem_err;
3540 for (j = 0; j < items[i].code_len; j++) {
3541 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3547 r = onig_node_str_cat(snode, buf, buf +
len);
3548 if (r != 0)
goto mem_err2;
3551 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3556 if (items[i].byte_len != slen) {
3558 UChar *q = p + items[i].byte_len;
3561 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3567 xnode = onig_node_list_add(NULL_NODE, snode);
3568 if (IS_NULL(xnode)) {
3570 onig_node_free(rem);
3573 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3575 onig_node_free(xnode);
3576 onig_node_free(rem);
3586 NCDR(var_anode) = an;
3599 onig_node_free(snode);
3602 onig_node_free(*rnode);
3604 return ONIGERR_MEMORY;
3607#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3610expand_case_fold_string(
Node* node,
regex_t* reg,
int state)
3612 int r, n,
len, alt_num;
3614 int is_in_look_behind;
3615 UChar *start, *end, *p;
3616 Node *top_root, *root, *snode, *prev_node;
3620 if (NSTRING_IS_AMBIG(node))
return 0;
3626 if (start >= end)
return 0;
3628 is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
3631 top_root = root = prev_node = snode = NULL_NODE;
3635 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3642 len = enclen(reg->enc, p, end);
3644 varlen = is_case_fold_variable_len(n, items,
len);
3645 if (n == 0 || varlen == 0 || is_in_look_behind) {
3646 if (IS_NULL(snode)) {
3647 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3648 onig_node_free(top_root);
3649 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3650 if (IS_NULL(root)) {
3651 onig_node_free(prev_node);
3656 prev_node = snode = onig_node_new_str(NULL, NULL);
3657 if (IS_NULL(snode))
goto mem_err;
3658 if (IS_NOT_NULL(root)) {
3659 if (IS_NULL(onig_node_list_add(root, snode))) {
3660 onig_node_free(snode);
3666 r = onig_node_str_cat(snode, p, p +
len);
3667 if (r != 0)
goto err;
3671 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION)
break;
3673 if (IS_NOT_NULL(snode)) {
3674 r = update_string_node_case_fold(reg, snode);
3676 NSTRING_SET_AMBIG(snode);
3679 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3680 onig_node_free(top_root);
3681 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3682 if (IS_NULL(root)) {
3683 onig_node_free(prev_node);
3688 r = expand_case_fold_string_alt(n, items, p,
len, end, reg, &prev_node);
3689 if (r < 0)
goto mem_err;
3691 if (IS_NULL(root)) {
3692 top_root = prev_node;
3695 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3696 onig_node_free(prev_node);
3701 root = NCAR(prev_node);
3704 if (IS_NOT_NULL(root)) {
3705 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3706 onig_node_free(prev_node);
3717 if (IS_NOT_NULL(snode)) {
3718 r = update_string_node_case_fold(reg, snode);
3720 NSTRING_SET_AMBIG(snode);
3727 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3728 if (r != 0)
goto mem_err;
3730 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3731 onig_node_free(top_root);
3732 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3733 if (IS_NULL(root)) {
3734 onig_node_free(srem);
3735 onig_node_free(prev_node);
3740 if (IS_NULL(root)) {
3744 if (IS_NULL(onig_node_list_add(root, srem))) {
3745 onig_node_free(srem);
3752 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3753 swap_node(node, top_root);
3754 onig_node_free(top_root);
3761 onig_node_free(top_root);
3766#ifdef USE_COMBINATION_EXPLOSION_CHECK
3768# define CEC_THRES_NUM_BIG_REPEAT 512
3769# define CEC_INFINITE_NUM 0x7fffffff
3771# define CEC_IN_INFINITE_REPEAT (1<<0)
3772# define CEC_IN_FINITE_REPEAT (1<<1)
3773# define CEC_CONT_BIG_REPEAT (1<<2)
3776setup_comb_exp_check(
Node* node,
int state,
ScanEnv* env)
3786 r = setup_comb_exp_check(NCAR(node), r, env);
3787 }
while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3795 ret = setup_comb_exp_check(NCAR(node), state, env);
3797 }
while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3803 int child_state = state;
3806 Node* target = qn->target;
3809 if (! IS_REPEAT_INFINITE(qn->upper)) {
3810 if (qn->upper > 1) {
3812 child_state |= CEC_IN_FINITE_REPEAT;
3815 if (env->backrefed_mem == 0) {
3816 if (NTYPE(qn->target) == NT_ENCLOSE) {
3818 if (en->type == ENCLOSE_MEMORY) {
3819 if (NTYPE(en->target) == NT_QTFR) {
3821 if (IS_REPEAT_INFINITE(q->upper)
3822 && q->greedy == qn->greedy) {
3823 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3825 child_state = state;
3834 if (state & CEC_IN_FINITE_REPEAT) {
3835 qn->comb_exp_check_num = -1;
3838 if (IS_REPEAT_INFINITE(qn->upper)) {
3839 var_num = CEC_INFINITE_NUM;
3840 child_state |= CEC_IN_INFINITE_REPEAT;
3843 var_num = qn->upper - qn->lower;
3846 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3847 add_state |= CEC_CONT_BIG_REPEAT;
3849 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3850 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3851 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3852 if (qn->comb_exp_check_num == 0) {
3853 env->num_comb_exp_check++;
3854 qn->comb_exp_check_num = env->num_comb_exp_check;
3855 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3856 env->comb_exp_max_regnum = env->curr_max_regnum;
3861 r = setup_comb_exp_check(target, child_state, env);
3871 case ENCLOSE_MEMORY:
3873 if (env->curr_max_regnum < en->regnum)
3874 env->curr_max_regnum = en->regnum;
3876 r = setup_comb_exp_check(en->target, state, env);
3881 r = setup_comb_exp_check(en->target, state, env);
3887# ifdef USE_SUBEXP_CALL
3889 if (IS_CALL_RECURSION(NCALL(node)))
3890 env->has_recursion = 1;
3892 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3923 Node* prev = NULL_NODE;
3925 r = setup_tree(NCAR(node), reg, state, env);
3926 if (IS_NOT_NULL(prev) && r == 0) {
3927 r = next_setup(prev, NCAR(node), reg);
3930 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3936 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3937 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3944 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3945 r = expand_case_fold_string(node, reg, state);
3953#ifdef USE_SUBEXP_CALL
3962 Node** nodes = SCANENV_MEM_NODES(env);
3965 for (i = 0; i < br->back_num; i++) {
3966 if (p[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
3967 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3968 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3969#ifdef USE_BACKREF_WITH_LEVEL
3970 if (IS_BACKREF_NEST_LEVEL(br)) {
3971 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3974 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3983 Node* target = qn->target;
3985 if ((state & IN_REPEAT) != 0) {
3986 qn->state |= NST_IN_REPEAT;
3989 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3990 r = get_min_match_length(target, &d, env);
3993 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3994#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3995 r = quantifiers_memory_node_info(target);
3998 qn->target_empty_info = r;
4002 r = get_max_match_length(target, &d, env);
4003 if (r == 0 && d == 0) {
4006 if (qn->lower > 1) qn->lower = 1;
4007 if (NTYPE(target) == NT_STR) {
4008 qn->upper = qn->lower = 0;
4016 if (qn->lower != qn->upper)
4017 state |= IN_VAR_REPEAT;
4018 r = setup_tree(target, reg, state, env);
4022#define EXPAND_STRING_MAX_LENGTH 100
4023 if (NTYPE(target) == NT_STR) {
4024 if (qn->lower > 1) {
4025 int i, n = qn->lower;
4026 OnigDistance
len = NSTRING_LEN(target);
4030 np = onig_node_new_str(sn->s, sn->end);
4031 if (IS_NULL(np))
return ONIGERR_MEMORY;
4032 NSTR(np)->flag = sn->flag;
4034 for (i = 1; i < n && (i+1) *
len <= EXPAND_STRING_MAX_LENGTH; i++) {
4035 r = onig_node_str_cat(np, sn->s, sn->end);
4041 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4045 if (! IS_REPEAT_INFINITE(qn->upper))
4048 np1 = onig_node_new_list(np, NULL);
4051 return ONIGERR_MEMORY;
4053 swap_node(np1, node);
4054 np2 = onig_node_list_add(node, np1);
4056 onig_node_free(np1);
4057 return ONIGERR_MEMORY;
4061 swap_node(np, node);
4068#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4069 if (qn->greedy && (qn->target_empty_info != 0)) {
4070 if (NTYPE(target) == NT_QTFR) {
4072 if (IS_NOT_NULL(tqn->head_exact)) {
4073 qn->head_exact = tqn->head_exact;
4074 tqn->head_exact = NULL;
4078 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4090 case ENCLOSE_OPTION:
4092 OnigOptionType options = reg->options;
4093 reg->options = NENCLOSE(node)->option;
4094 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4095 reg->options = options;
4099 case ENCLOSE_MEMORY:
4100 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4101 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4104 if (IS_ENCLOSE_CALLED(en))
4106 if (IS_ENCLOSE_RECURSION(en))
4107 state |= IN_RECCALL;
4108 else if ((state & IN_RECCALL) != 0)
4109 SET_CALL_RECURSION(node);
4110 r = setup_tree(en->target, reg, state, env);
4113 case ENCLOSE_STOP_BACKTRACK:
4115 Node* target = en->target;
4116 r = setup_tree(target, reg, state, env);
4117 if (NTYPE(target) == NT_QTFR) {
4119 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4121 int qtype = NTYPE(tqn->target);
4122 if (IS_NODE_TYPE_SIMPLE(qtype))
4123 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4129 case ENCLOSE_CONDITION:
4130#ifdef USE_NAMED_GROUP
4131 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4132 env->num_named > 0 &&
4133 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4134 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4135 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4138 if (NENCLOSE(node)->regnum > env->num_mem)
4139 return ONIGERR_INVALID_BACKREF;
4140 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4143 case ENCLOSE_ABSENT:
4144 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4155 case ANCHOR_PREC_READ:
4156 r = setup_tree(an->target, reg, state, env);
4158 case ANCHOR_PREC_READ_NOT:
4159 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4163#define ALLOWED_TYPE_IN_LB \
4164 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4165 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4167#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4168#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4170#define ALLOWED_ANCHOR_IN_LB \
4171( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4172 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4173 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4174 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4175#define ALLOWED_ANCHOR_IN_LB_NOT \
4176( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4177 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4178 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4179 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4181 case ANCHOR_LOOK_BEHIND:
4183 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4184 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4185 if (r < 0)
return r;
4186 if (r > 0)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4187 if (NTYPE(node) != NT_ANCHOR)
goto restart;
4188 r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
4189 if (r != 0)
return r;
4190 r = setup_look_behind(node, reg, env);
4194 case ANCHOR_LOOK_BEHIND_NOT:
4196 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4197 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4198 if (r < 0)
return r;
4199 if (r > 0)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4200 if (NTYPE(node) != NT_ANCHOR)
goto restart;
4201 r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
4203 if (r != 0)
return r;
4204 r = setup_look_behind(node, reg, env);
4220set_bm_skip(UChar* s, UChar* end,
regex_t* reg,
4221 UChar skip[],
int ignore_case)
4223 OnigDistance i,
len;
4224 int clen, flen, n, j, k;
4225 UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4230 if (
len >= ONIG_CHAR_TABLE_SIZE) {
4232 return ONIGERR_TYPE_BUG;
4236 for (i = 0; i <
len; i += clen) {
4238 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4240 clen = enclen(enc, p, end);
4242 clen = (int )(end - p);
4244 for (j = 0; j < n; j++) {
4245 if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
4250 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
4262 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4263 skip[i] = (UChar )(
len + 1);
4265 for (i = 0; i <
len; i += clen) {
4268 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4270 clen = enclen(enc, p, end);
4272 clen = (int )(end - p);
4274 for (j = 0; j < clen; j++) {
4275 skip[s[i + j]] = (UChar )(
len - i - j);
4276 for (k = 0; k < n; k++) {
4277 ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
4278 skip[buf[j]] = (UChar )(
len - i - j);
4294 OnigOptionType options;
4295 OnigCaseFoldType case_fold_flag;
4311 UChar s[OPT_EXACT_MAXLEN];
4319 UChar map[ONIG_CHAR_TABLE_SIZE];
4337 static const short int ByteValTable[] = {
4338 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4339 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4340 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4341 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4342 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4343 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4344 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4345 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4348 if (i < numberof(ByteValTable)) {
4349 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4352 return (
int )ByteValTable[i];
4362 static const short int dist_vals[] = {
4363 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4364 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4365 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4366 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4367 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4368 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4369 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4370 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4371 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4372 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4377 if (mm->max == ONIG_INFINITE_DISTANCE)
return 0;
4379 d = mm->max - mm->min;
4380 if (d < numberof(dist_vals))
4382 return (
int )dist_vals[d];
4390 if (v2 <= 0)
return -1;
4391 if (v1 <= 0)
return 1;
4393 v1 *= distance_value(d1);
4394 v2 *= distance_value(d2);
4396 if (v2 > v1)
return 1;
4397 if (v2 < v1)
return -1;
4399 if (d2->min < d1->min)
return 1;
4400 if (d2->min > d1->min)
return -1;
4407 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4412set_mml(
MinMaxLen* mml, OnigDistance min, OnigDistance max)
4421 mml->min = mml->max = 0;
4427 to->min = from->min;
4428 to->max = from->max;
4434 to->min = distance_add(to->min, from->min);
4435 to->max = distance_add(to->max, from->max);
4442 to->min = distance_add(to->min,
len);
4443 to->max = distance_add(to->max,
len);
4450 if (to->min > from->min) to->min = from->min;
4451 if (to->max < from->max) to->max = from->max;
4463 anc->left_anchor = 0;
4464 anc->right_anchor = 0;
4475 OnigDistance left_len, OnigDistance right_len)
4477 clear_opt_anc_info(to);
4479 to->left_anchor = left->left_anchor;
4480 if (left_len == 0) {
4481 to->left_anchor |= right->left_anchor;
4484 to->right_anchor = right->right_anchor;
4485 if (right_len == 0) {
4486 to->right_anchor |= left->right_anchor;
4489 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4494is_left_anchor(
int anc)
4496 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4497 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4498 anc == ANCHOR_PREC_READ_NOT)
4507 if ((to->left_anchor & anc) != 0)
return 1;
4509 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4515 if (is_left_anchor(anc))
4516 to->left_anchor |= anc;
4518 to->right_anchor |= anc;
4524 if (is_left_anchor(anc))
4525 to->left_anchor &= ~anc;
4527 to->right_anchor &= ~anc;
4533 to->left_anchor &= add->left_anchor;
4534 to->right_anchor &= add->right_anchor;
4540 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4546 clear_mml(&ex->mmd);
4547 clear_opt_anc_info(&ex->anc);
4549 ex->ignore_case = -1;
4567 if (to->ignore_case < 0)
4568 to->ignore_case = add->ignore_case;
4569 else if (to->ignore_case != add->ignore_case)
4574 for (i = to->len; p < end; ) {
4575 len = enclen(enc, p, end);
4576 if (i +
len > OPT_EXACT_MAXLEN)
break;
4577 for (j = 0; j <
len && p < end; j++)
4582 to->reach_end = (p == end ? add->reach_end : 0);
4584 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4585 if (! to->reach_end) tanc.right_anchor = 0;
4586 copy_opt_anc_info(&to->anc, &tanc);
4590concat_opt_exact_info_str(
OptExactInfo* to, UChar* s, UChar* end,
4596 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4597 len = enclen(enc, p, end);
4598 if (i +
len > OPT_EXACT_MAXLEN)
break;
4599 for (j = 0; j <
len && p < end; j++)
4611 if (add->len == 0 || to->len == 0) {
4612 clear_opt_exact_info(to);
4616 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4617 clear_opt_exact_info(to);
4621 for (i = 0; i < to->len && i < add->len; ) {
4622 if (to->s[i] != add->s[i])
break;
4623 len = enclen(env->enc, to->s + i, to->s + to->len);
4625 for (j = 1; j <
len; j++) {
4626 if (to->s[i+j] != add->s[i+j])
break;
4632 if (! add->reach_end || i < add->
len || i < to->
len) {
4636 if (to->ignore_case < 0)
4637 to->ignore_case = add->ignore_case;
4638 else if (add->ignore_case >= 0)
4639 to->ignore_case |= add->ignore_case;
4641 alt_merge_opt_anc_info(&to->anc, &add->anc);
4642 if (! to->reach_end) to->anc.right_anchor = 0;
4657 copy_opt_exact_info(now, alt);
4660 else if (v1 <= 2 && v2 <= 2) {
4662 v2 = map_position_value(enc, now->s[0]);
4663 v1 = map_position_value(enc, alt->s[0]);
4665 if (now->len > 1) v1 += 5;
4666 if (alt->len > 1) v2 += 5;
4669 if (now->ignore_case <= 0) v1 *= 2;
4670 if (alt->ignore_case <= 0) v2 *= 2;
4672 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4673 copy_opt_exact_info(now, alt);
4682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4683 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4696 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4697 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4701 xmemcpy(map, &clean_info,
sizeof(
OptMapInfo));
4713 if (map->map[c] == 0) {
4715 map->value += map_position_value(enc, c);
4720add_char_amb_opt_map_info(
OptMapInfo* map, UChar* p, UChar* end,
4724 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4727 add_char_opt_map_info(map, p[0], enc);
4729 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4730 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4731 if (n < 0)
return n;
4733 for (i = 0; i < n; i++) {
4734 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4735 add_char_opt_map_info(map, buf[0], enc);
4744 const int z = 1<<15;
4748 if (alt->value == 0) return ;
4749 if (now->value == 0) {
4750 copy_opt_map_info(now, alt);
4754 v1 = z / now->value;
4755 v2 = z / alt->value;
4756 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4757 copy_opt_map_info(now, alt);
4763#define COMP_EM_BASE 20
4766 if (m->value <= 0)
return -1;
4768 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4769 vm = COMP_EM_BASE * 5 * 2 / m->value;
4770 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4779 if (to->value == 0) return ;
4780 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4781 clear_opt_map_info(to);
4785 alt_merge_mml(&to->mmd, &add->mmd);
4788 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4793 val += map_position_value(enc, i);
4797 alt_merge_opt_anc_info(&to->anc, &add->anc);
4803 copy_mml(&(opt->exb.mmd), mmd);
4804 copy_mml(&(opt->expr.mmd), mmd);
4805 copy_mml(&(opt->map.mmd), mmd);
4811 clear_mml(&opt->len);
4812 clear_opt_anc_info(&opt->anc);
4813 clear_opt_exact_info(&opt->exb);
4814 clear_opt_exact_info(&opt->exm);
4815 clear_opt_exact_info(&opt->expr);
4816 clear_opt_map_info(&opt->map);
4828 int exb_reach, exm_reach;
4831 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4832 copy_opt_anc_info(&to->anc, &tanc);
4834 if (add->exb.len > 0 && to->len.max == 0) {
4835 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4836 to->len.max, add->len.max);
4837 copy_opt_anc_info(&add->exb.anc, &tanc);
4840 if (add->map.value > 0 && to->len.max == 0) {
4841 if (add->map.mmd.max == 0)
4842 add->map.anc.left_anchor |= to->anc.left_anchor;
4845 exb_reach = to->exb.reach_end;
4846 exm_reach = to->exm.reach_end;
4848 if (add->len.max != 0)
4849 to->exb.reach_end = to->exm.reach_end = 0;
4851 if (add->exb.len > 0) {
4853 concat_opt_exact_info(&to->exb, &add->exb, enc);
4854 clear_opt_exact_info(&add->exb);
4856 else if (exm_reach) {
4857 concat_opt_exact_info(&to->exm, &add->exb, enc);
4858 clear_opt_exact_info(&add->exb);
4861 select_opt_exact_info(enc, &to->exm, &add->exb);
4862 select_opt_exact_info(enc, &to->exm, &add->exm);
4864 if (to->expr.len > 0) {
4865 if (add->len.max > 0) {
4866 if (to->expr.len > (
int )add->len.max)
4867 to->expr.len = (int )add->len.max;
4869 if (to->expr.mmd.max == 0)
4870 select_opt_exact_info(enc, &to->exb, &to->expr);
4872 select_opt_exact_info(enc, &to->exm, &to->expr);
4875 else if (add->expr.len > 0) {
4876 copy_opt_exact_info(&to->expr, &add->expr);
4879 select_opt_map_info(&to->map, &add->map);
4881 add_mml(&to->len, &add->len);
4887 alt_merge_opt_anc_info (&to->anc, &add->anc);
4888 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4889 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4890 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4891 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4893 alt_merge_mml(&to->len, &add->len);
4897#define MAX_NODE_OPT_INFO_REF_COUNT 5
4905 clear_node_opt_info(opt);
4906 set_bound_node_opt_info(opt, &env->mmd);
4916 copy_opt_env(&nenv, env);
4918 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4920 add_mml(&nenv.mmd, &nopt.len);
4921 concat_left_node_opt_info(env->enc, opt, &nopt);
4923 }
while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4933 r = optimize_node_left(NCAR(nd), &nopt, env);
4935 if (nd == node) copy_node_opt_info(opt, &nopt);
4936 else alt_merge_node_opt_info(opt, &nopt, env);
4938 }
while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4945 OnigDistance slen = sn->end - sn->s;
4946 int is_raw = NSTRING_IS_RAW(node);
4948 if (! NSTRING_IS_AMBIG(node)) {
4949 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4951 opt->exb.ignore_case = 0;
4953 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4955 set_mml(&opt->len, slen, slen);
4960 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4961 int n = onigenc_strlen(env->enc, sn->s, sn->end);
4962 max = (OnigDistance )ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
4965 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4967 opt->exb.ignore_case = 1;
4970 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4971 env->enc, env->case_fold_flag);
4978 set_mml(&opt->len, slen, max);
4981 if ((OnigDistance )opt->exb.len == slen)
4982 opt->exb.reach_end = 1;
4993 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4994 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4995 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4997 set_mml(&opt->len, min, max);
5000 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5001 z = BITSET_AT(cc->bs, i);
5002 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5003 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5006 set_mml(&opt->len, 1, 1);
5016 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5021 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5022 switch (NCTYPE(node)->ctype) {
5023 case ONIGENC_CTYPE_WORD:
5024 if (NCTYPE(node)->not != 0) {
5025 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5026 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5027 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5032 for (i = 0; i < maxcode; i++) {
5033 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5034 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5042 min = ONIGENC_MBC_MINLEN(env->enc);
5044 set_mml(&opt->len, min, max);
5050 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5051 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5052 set_mml(&opt->len, min, max);
5057 switch (NANCHOR(node)->type) {
5058 case ANCHOR_BEGIN_BUF:
5059 case ANCHOR_BEGIN_POSITION:
5060 case ANCHOR_BEGIN_LINE:
5061 case ANCHOR_END_BUF:
5062 case ANCHOR_SEMI_END_BUF:
5063 case ANCHOR_END_LINE:
5064 case ANCHOR_LOOK_BEHIND:
5065 case ANCHOR_PREC_READ_NOT:
5066 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5069 case ANCHOR_PREC_READ:
5073 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5075 if (nopt.exb.len > 0)
5076 copy_opt_exact_info(&opt->expr, &nopt.exb);
5077 else if (nopt.exm.len > 0)
5078 copy_opt_exact_info(&opt->expr, &nopt.exm);
5080 opt->expr.reach_end = 0;
5082 if (nopt.map.value > 0)
5083 copy_opt_map_info(&opt->map, &nopt.map);
5088 case ANCHOR_LOOK_BEHIND_NOT:
5097 OnigDistance min, max, tmin, tmax;
5098 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5101 if (br->state & NST_RECURSION) {
5102 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5105 backs = BACKREFS_P(br);
5106 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5108 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5110 for (i = 1; i < br->back_num; i++) {
5111 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5113 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5115 if (min > tmin) min = tmin;
5116 if (max < tmax) max = tmax;
5118 if (r == 0) set_mml(&opt->len, min, max);
5122#ifdef USE_SUBEXP_CALL
5124 if (IS_CALL_RECURSION(NCALL(node)))
5125 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5127 OnigOptionType save = env->options;
5128 env->options = NENCLOSE(NCALL(node)->target)->option;
5129 r = optimize_node_left(NCALL(node)->target, opt, env);
5130 env->options = save;
5138 OnigDistance min, max;
5142 r = optimize_node_left(qn->target, &nopt, env);
5145 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
5146 if (env->mmd.max == 0 &&
5147 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5148 if (IS_MULTILINE(env->options))
5150 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5152 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5156 if (qn->lower > 0) {
5157 copy_node_opt_info(opt, &nopt);
5158 if (nopt.exb.len > 0) {
5159 if (nopt.exb.reach_end) {
5160 for (i = 2; i <= qn->lower &&
5161 ! is_full_opt_exact_info(&opt->exb); i++) {
5162 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5164 if (i < qn->lower) {
5165 opt->exb.reach_end = 0;
5170 if (qn->lower != qn->upper) {
5171 opt->exb.reach_end = 0;
5172 opt->exm.reach_end = 0;
5175 opt->exm.reach_end = 0;
5179 min = distance_multiply(nopt.len.min, qn->lower);
5180 if (IS_REPEAT_INFINITE(qn->upper))
5181 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5183 max = distance_multiply(nopt.len.max, qn->upper);
5185 set_mml(&opt->len, min, max);
5194 case ENCLOSE_OPTION:
5196 OnigOptionType save = env->options;
5198 env->options = en->option;
5199 r = optimize_node_left(en->target, opt, env);
5200 env->options = save;
5204 case ENCLOSE_MEMORY:
5205#ifdef USE_SUBEXP_CALL
5207 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5208 OnigDistance min, max;
5211 max = ONIG_INFINITE_DISTANCE;
5212 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5213 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5214 set_mml(&opt->len, min, max);
5219 r = optimize_node_left(en->target, opt, env);
5221 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5222 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5223 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5228 case ENCLOSE_STOP_BACKTRACK:
5229 case ENCLOSE_CONDITION:
5230 r = optimize_node_left(en->target, opt, env);
5233 case ENCLOSE_ABSENT:
5234 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5242 fprintf(stderr,
"optimize_node_left: undefined node type %d\n",
5245 r = ONIGERR_TYPE_BUG;
5257 if (e->len == 0)
return 0;
5259 reg->exact = (UChar* )
xmalloc(e->len);
5260 CHECK_NULL_RETURN_MEMERR(reg->exact);
5261 xmemcpy(reg->exact, e->s, e->len);
5262 reg->exact_end = reg->exact + e->len;
5265 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5267 if (e->ignore_case > 0) {
5268 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5269 int orig_len = e->len;
5270 e->len = set_bm_skip(reg->exact, reg->exact_end, reg,
5273 reg->exact_end = reg->exact + e->len;
5274 reg->optimize = (allow_reverse != 0
5275 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5285 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5289 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5293 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5294 set_bm_skip(reg->exact, reg->exact_end, reg,
5296 reg->optimize = (allow_reverse != 0
5297 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5300 reg->optimize = ONIG_OPTIMIZE_EXACT;
5304 reg->dmin = e->mmd.min;
5305 reg->dmax = e->mmd.max;
5307 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5308 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5319 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5320 reg->map[i] = m->map[i];
5322 reg->optimize = ONIG_OPTIMIZE_MAP;
5323 reg->dmin = m->mmd.min;
5324 reg->dmax = m->mmd.max;
5326 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5327 reg->threshold_len = (int )(reg->dmin + 1);
5334 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5335 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5338#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5339static void print_optimize_info(
FILE* f,
regex_t* reg);
5351 env.options = reg->options;
5352 env.case_fold_flag = reg->case_fold_flag;
5353 env.scan_env = scan_env;
5354 clear_mml(&env.mmd);
5356 r = optimize_node_left(node, &opt, &env);
5359 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5360 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5361 ANCHOR_LOOK_BEHIND);
5363 if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5364 reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5366 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5367 ANCHOR_PREC_READ_NOT);
5369 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5370 reg->anchor_dmin = opt.len.min;
5371 reg->anchor_dmax = opt.len.max;
5374 if (opt.exb.len > 0 || opt.exm.len > 0) {
5375 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5376 if (opt.map.value > 0 &&
5377 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5381 r = set_optimize_exact_info(reg, &opt.exb);
5382 set_sub_anchor(reg, &opt.exb.anc);
5385 else if (opt.map.value > 0) {
5387 set_optimize_map_info(reg, &opt.map);
5388 set_sub_anchor(reg, &opt.map.anc);
5391 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5392 if (opt.len.max == 0)
5393 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5396#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5397 print_optimize_info(stderr, reg);
5403clear_optimize_info(
regex_t* reg)
5405 reg->optimize = ONIG_OPTIMIZE_NONE;
5407 reg->anchor_dmin = 0;
5408 reg->anchor_dmax = 0;
5409 reg->sub_anchor = 0;
5410 reg->exact_end = (UChar* )NULL;
5411 reg->threshold_len = 0;
5413 reg->exact = (UChar* )NULL;
5419 const UChar *s,
const UChar *end)
5421 fprintf(fp,
"\nPATTERN: /");
5423 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5429 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5431 fprintf(fp,
" 0x%04x ", (
int )code);
5434 fputc((
int )code, fp);
5437 p += enclen(enc, p, end);
5442 fputc((
int )*s, fp);
5447 fprintf(fp,
"/ (%s)\n", enc->name);
5451#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5453print_distance_range(
FILE* f, OnigDistance a, OnigDistance b)
5455 if (a == ONIG_INFINITE_DISTANCE)
5458 fprintf(f,
"(%"PRIuPTR
")", a);
5462 if (b == ONIG_INFINITE_DISTANCE)
5465 fprintf(f,
"(%"PRIuPTR
")", b);
5469print_anchor(
FILE* f,
int anchor)
5475 if (anchor & ANCHOR_BEGIN_BUF) {
5476 fprintf(f,
"begin-buf");
5479 if (anchor & ANCHOR_BEGIN_LINE) {
5480 if (q) fprintf(f,
", ");
5482 fprintf(f,
"begin-line");
5484 if (anchor & ANCHOR_BEGIN_POSITION) {
5485 if (q) fprintf(f,
", ");
5487 fprintf(f,
"begin-pos");
5489 if (anchor & ANCHOR_END_BUF) {
5490 if (q) fprintf(f,
", ");
5492 fprintf(f,
"end-buf");
5494 if (anchor & ANCHOR_SEMI_END_BUF) {
5495 if (q) fprintf(f,
", ");
5497 fprintf(f,
"semi-end-buf");
5499 if (anchor & ANCHOR_END_LINE) {
5500 if (q) fprintf(f,
", ");
5502 fprintf(f,
"end-line");
5504 if (anchor & ANCHOR_ANYCHAR_STAR) {
5505 if (q) fprintf(f,
", ");
5507 fprintf(f,
"anychar-star");
5509 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5510 if (q) fprintf(f,
", ");
5511 fprintf(f,
"anychar-star-ml");
5520 static const char* on[] = {
"NONE",
"EXACT",
"EXACT_BM",
"EXACT_BM_NOT_REV",
5522 "EXACT_BM_IC",
"EXACT_BM_NOT_REV_IC" };
5524 fprintf(f,
"optimize: %s\n", on[reg->optimize]);
5525 fprintf(f,
" anchor: "); print_anchor(f, reg->anchor);
5526 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5527 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5530 if (reg->optimize) {
5531 fprintf(f,
" sub anchor: "); print_anchor(f, reg->sub_anchor);
5538 fprintf(f,
"exact: [");
5539 for (p = reg->exact; p < reg->exact_end; p++) {
5542 fprintf(f,
"]: length: %"PRIdPTR
"\n", (reg->exact_end - reg->exact));
5544 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5547 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5548 if (reg->map[i]) n++;
5550 fprintf(f,
"map: n=%d\n", n);
5554 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5555 if (reg->map[i] != 0) {
5556 if (c > 0) fputs(
", ", f);
5558 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5559 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5562 fprintf(f,
"%d", i);
5575 if (IS_NOT_NULL(reg)) {
5578 xfree(reg->repeat_range);
5579 onig_free(reg->chain);
5581#ifdef USE_NAMED_GROUP
5582 onig_names_free(reg);
5590 if (IS_NOT_NULL(reg)) {
5591 onig_free_body(reg);
5597dup_copy(
const void *ptr,
size_t size)
5600 if (IS_NOT_NULL(newptr)) {
5601 memcpy(newptr, ptr, size);
5609 if (IS_NOT_NULL(oreg)) {
5611 if (IS_NULL(reg))
return ONIGERR_MEMORY;
5615# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size))
5617 if (IS_NOT_NULL(reg->exact)) {
5618 size_t exact_size = reg->exact_end - reg->exact;
5619 if (COPY_FAILED(exact, exact_size))
5621 (reg)->exact_end = (reg)->exact + exact_size;
5624 if (IS_NOT_NULL(reg->p)) {
5625 if (COPY_FAILED(p, reg->alloc))
5628 if (IS_NOT_NULL(reg->repeat_range)) {
5629 if (COPY_FAILED(repeat_range, reg->repeat_range_alloc *
sizeof(
OnigRepeatRange)))
5630 goto err_repeat_range;
5632 if (IS_NOT_NULL(reg->name_table)) {
5633 if (onig_names_copy(reg, oreg))
5634 goto err_name_table;
5636 if (IS_NOT_NULL(reg->chain)) {
5637 if (onig_reg_copy(®->chain, reg->chain))
5644 onig_names_free(reg);
5646 xfree(reg->repeat_range);
5653 return ONIGERR_MEMORY;
5660onig_memsize(
const regex_t *reg)
5662 size_t size =
sizeof(
regex_t);
5663 if (IS_NULL(reg))
return 0;
5664 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5665 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5666 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc *
sizeof(
OnigRepeatRange);
5667 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5675 size_t size =
sizeof(*regs);
5676 if (IS_NULL(regs))
return 0;
5677 size += regs->allocated * (
sizeof(*regs->beg) +
sizeof(*regs->end));
5682#define REGEX_TRANSFER(to,from) do {\
5683 onig_free_body(to);\
5684 xmemcpy(to, from, sizeof(regex_t));\
5692 REGEX_TRANSFER(to, from);
5696#ifdef ONIG_DEBUG_COMPILE
5697static void print_compiled_byte_code_list(
FILE* f,
regex_t* reg);
5699#ifdef ONIG_DEBUG_PARSE_TREE
5700static void print_tree(
FILE* f,
Node* node);
5705onig_compile(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5708 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5714onig_compile_ruby(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5715 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
5718onig_compile(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5722#define COMPILE_INIT_SIZE 20
5725 OnigDistance init_size;
5728#ifdef USE_SUBEXP_CALL
5732 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5735 scan_env.sourcefile = sourcefile;
5736 scan_env.sourceline = sourceline;
5740 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5743 if (reg->alloc == 0) {
5744 init_size = (pattern_end - pattern) * 2;
5745 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5746 r = BBUF_INIT(reg, init_size);
5747 if (r != 0)
goto end;
5753 reg->num_repeat = 0;
5754 reg->num_null_check = 0;
5755 reg->repeat_range_alloc = 0;
5757#ifdef USE_COMBINATION_EXPLOSION_CHECK
5758 reg->num_comb_exp_check = 0;
5761 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5762 if (r != 0)
goto err;
5764#ifdef ONIG_DEBUG_PARSE_TREE
5766 fprintf(stderr,
"ORIGINAL PARSE TREE:\n");
5767 print_tree(stderr, root);
5771#ifdef USE_NAMED_GROUP
5773 if (scan_env.num_named > 0 &&
5774 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5775 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5776 if (scan_env.num_named != scan_env.num_mem)
5777 r = disable_noname_group_capture(&root, reg, &scan_env);
5779 r = numbered_ref_check(root);
5781 if (r != 0)
goto err;
5785#ifdef USE_SUBEXP_CALL
5786 if (scan_env.num_call > 0) {
5787 r = unset_addr_list_init(&uslist, scan_env.num_call);
5788 if (r != 0)
goto err;
5789 scan_env.unset_addr_list = &uslist;
5790 r = setup_subexp_call(root, &scan_env);
5791 if (r != 0)
goto err_unset;
5792 r = subexp_recursive_check_trav(root, &scan_env);
5793 if (r < 0)
goto err_unset;
5794 r = subexp_inf_recursive_check_trav(root, &scan_env);
5795 if (r != 0)
goto err_unset;
5797 reg->num_call = scan_env.num_call;
5803 r = setup_tree(root, reg, 0, &scan_env);
5804 if (r != 0)
goto err_unset;
5806#ifdef ONIG_DEBUG_PARSE_TREE
5807 print_tree(stderr, root);
5810 reg->capture_history = scan_env.capture_history;
5811 reg->bt_mem_start = scan_env.bt_mem_start;
5812 reg->bt_mem_start |= reg->capture_history;
5813 if (IS_FIND_CONDITION(reg->options))
5814 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5816 reg->bt_mem_end = scan_env.bt_mem_end;
5817 reg->bt_mem_end |= reg->capture_history;
5820#ifdef USE_COMBINATION_EXPLOSION_CHECK
5821 if (scan_env.backrefed_mem == 0
5822# ifdef USE_SUBEXP_CALL
5823 || scan_env.num_call == 0
5826 setup_comb_exp_check(root, 0, &scan_env);
5827# ifdef USE_SUBEXP_CALL
5828 if (scan_env.has_recursion != 0) {
5829 scan_env.num_comb_exp_check = 0;
5833 if (scan_env.comb_exp_max_regnum > 0) {
5835 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5836 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5837 scan_env.num_comb_exp_check = 0;
5844 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5847 clear_optimize_info(reg);
5848#ifndef ONIG_DONT_OPTIMIZE
5849 r = set_optimize_info_from_tree(root, reg, &scan_env);
5850 if (r != 0)
goto err_unset;
5853 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5854 xfree(scan_env.mem_nodes_dynamic);
5855 scan_env.mem_nodes_dynamic = (
Node** )NULL;
5858 r = compile_tree(root, reg);
5860 r = add_opcode(reg, OP_END);
5861#ifdef USE_SUBEXP_CALL
5862 if (scan_env.num_call > 0) {
5863 r = unset_addr_list_fix(&uslist, reg);
5864 unset_addr_list_end(&uslist);
5869 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5870 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5872 if (reg->bt_mem_start != 0)
5873 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5875 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5878#ifdef USE_SUBEXP_CALL
5879 else if (scan_env.num_call > 0) {
5880 unset_addr_list_end(&uslist);
5883 onig_node_free(root);
5885#ifdef ONIG_DEBUG_COMPILE
5886# ifdef USE_NAMED_GROUP
5887 onig_print_names(stderr, reg);
5889 print_compiled_byte_code_list(stderr, reg);
5893 onig_reg_resize(reg);
5897#ifdef USE_SUBEXP_CALL
5898 if (scan_env.num_call > 0) {
5899 unset_addr_list_end(&uslist);
5903 if (IS_NOT_NULL(scan_env.error)) {
5904 if (IS_NOT_NULL(einfo)) {
5905 einfo->enc = scan_env.enc;
5906 einfo->par = scan_env.error;
5907 einfo->par_end = scan_env.error_end;
5911 onig_node_free(root);
5912 xfree(scan_env.mem_nodes_dynamic);
5917static int onig_inited = 0;
5920onig_reg_init(
regex_t* reg, OnigOptionType option,
5921 OnigCaseFoldType case_fold_flag,
5928 return ONIGERR_INVALID_ARGUMENT;
5930 (reg)->exact = (UChar* )NULL;
5931 (reg)->chain = (
regex_t* )NULL;
5932 (reg)->p = (UChar* )NULL;
5933 (reg)->name_table = (
void* )NULL;
5936 if (ONIGENC_IS_UNDEF(enc))
5937 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5939 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5940 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5941 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5944 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5945 option |= syntax->options;
5946 option &= ~ONIG_OPTION_SINGLELINE;
5949 option |= syntax->options;
5952 (reg)->options = option;
5953 (reg)->syntax = syntax;
5954 (reg)->optimize = 0;
5959 (reg)->case_fold_flag = case_fold_flag;
5961 (reg)->timelimit = 0;
5967onig_new_without_alloc(
regex_t* reg,
const UChar* pattern,
5968 const UChar* pattern_end, OnigOptionType option,
OnigEncoding enc,
5973 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5976 r = onig_compile(reg, pattern, pattern_end, einfo);
5981onig_new(
regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
5986 if (IS_NULL(*reg))
return ONIGERR_MEMORY;
5988 int r = onig_new_without_alloc(*reg, pattern, pattern_end, option, enc, syntax, einfo);
5998onig_initialize(
OnigEncoding encodings[] ARG_UNUSED,
int n ARG_UNUSED)
6006 if (onig_inited != 0)
6011#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6012 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6018#ifdef ONIG_DEBUG_STATISTICS
6019 onig_statistics_init();
6028extern void onig_add_end_call(
void (*func)(
void))
6033 if (item == 0) return ;
6035 item->next = EndCallTop;
6042exec_end_call_list(
void)
6047 while (EndCallTop != 0) {
6048 func = EndCallTop->func;
6052 EndCallTop = EndCallTop->next;
6060 exec_end_call_list();
6062#ifdef ONIG_DEBUG_STATISTICS
6063 onig_print_statistics(stderr);
6066#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6067 _CrtDumpMemoryLeaks();
6076onig_is_in_code_range(
const UChar* p, OnigCodePoint code)
6078 OnigCodePoint n, *data;
6079 OnigCodePoint low, high, x;
6081 GET_CODE_POINT(n, p);
6082 data = (OnigCodePoint* )p;
6085 for (low = 0, high = n; low < high; ) {
6086 x = (low + high) >> 1;
6087 if (code > data[x * 2 + 1])
6093 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6097onig_is_code_in_cc_len(
int elen, OnigCodePoint code,
CClassNode* cc)
6101 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6102 if (IS_NULL(cc->mbuf)) {
6106 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6110 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6113 if (IS_NCCLASS_NOT(cc))
6124 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6128 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6130 return onig_is_code_in_cc_len(
len, code, cc);
6137# define ARG_SPECIAL -1
6139# define ARG_RELADDR 1
6140# define ARG_ABSADDR 2
6141# define ARG_LENGTH 3
6142# define ARG_MEMNUM 4
6143# define ARG_OPTION 5
6144# define ARG_STATE_CHECK 6
6146OnigOpInfoType OnigOpInfo[] = {
6147 { OP_FINISH,
"finish", ARG_NON },
6148 { OP_END,
"end", ARG_NON },
6149 { OP_EXACT1,
"exact1", ARG_SPECIAL },
6150 { OP_EXACT2,
"exact2", ARG_SPECIAL },
6151 { OP_EXACT3,
"exact3", ARG_SPECIAL },
6152 { OP_EXACT4,
"exact4", ARG_SPECIAL },
6153 { OP_EXACT5,
"exact5", ARG_SPECIAL },
6154 { OP_EXACTN,
"exactn", ARG_SPECIAL },
6155 { OP_EXACTMB2N1,
"exactmb2-n1", ARG_SPECIAL },
6156 { OP_EXACTMB2N2,
"exactmb2-n2", ARG_SPECIAL },
6157 { OP_EXACTMB2N3,
"exactmb2-n3", ARG_SPECIAL },
6158 { OP_EXACTMB2N,
"exactmb2-n", ARG_SPECIAL },
6159 { OP_EXACTMB3N,
"exactmb3n" , ARG_SPECIAL },
6160 { OP_EXACTMBN,
"exactmbn", ARG_SPECIAL },
6161 { OP_EXACT1_IC,
"exact1-ic", ARG_SPECIAL },
6162 { OP_EXACTN_IC,
"exactn-ic", ARG_SPECIAL },
6163 { OP_CCLASS,
"cclass", ARG_SPECIAL },
6164 { OP_CCLASS_MB,
"cclass-mb", ARG_SPECIAL },
6165 { OP_CCLASS_MIX,
"cclass-mix", ARG_SPECIAL },
6166 { OP_CCLASS_NOT,
"cclass-not", ARG_SPECIAL },
6167 { OP_CCLASS_MB_NOT,
"cclass-mb-not", ARG_SPECIAL },
6168 { OP_CCLASS_MIX_NOT,
"cclass-mix-not", ARG_SPECIAL },
6169 { OP_ANYCHAR,
"anychar", ARG_NON },
6170 { OP_ANYCHAR_ML,
"anychar-ml", ARG_NON },
6171 { OP_ANYCHAR_STAR,
"anychar*", ARG_NON },
6172 { OP_ANYCHAR_ML_STAR,
"anychar-ml*", ARG_NON },
6173 { OP_ANYCHAR_STAR_PEEK_NEXT,
"anychar*-peek-next", ARG_SPECIAL },
6174 { OP_ANYCHAR_ML_STAR_PEEK_NEXT,
"anychar-ml*-peek-next", ARG_SPECIAL },
6175 { OP_WORD,
"word", ARG_NON },
6176 { OP_NOT_WORD,
"not-word", ARG_NON },
6177 { OP_WORD_BOUND,
"word-bound", ARG_NON },
6178 { OP_NOT_WORD_BOUND,
"not-word-bound", ARG_NON },
6179 { OP_WORD_BEGIN,
"word-begin", ARG_NON },
6180 { OP_WORD_END,
"word-end", ARG_NON },
6181 { OP_ASCII_WORD,
"ascii-word", ARG_NON },
6182 { OP_NOT_ASCII_WORD,
"not-ascii-word", ARG_NON },
6183 { OP_ASCII_WORD_BOUND,
"ascii-word-bound", ARG_NON },
6184 { OP_NOT_ASCII_WORD_BOUND,
"not-ascii-word-bound", ARG_NON },
6185 { OP_ASCII_WORD_BEGIN,
"ascii-word-begin", ARG_NON },
6186 { OP_ASCII_WORD_END,
"ascii-word-end", ARG_NON },
6187 { OP_BEGIN_BUF,
"begin-buf", ARG_NON },
6188 { OP_END_BUF,
"end-buf", ARG_NON },
6189 { OP_BEGIN_LINE,
"begin-line", ARG_NON },
6190 { OP_END_LINE,
"end-line", ARG_NON },
6191 { OP_SEMI_END_BUF,
"semi-end-buf", ARG_NON },
6192 { OP_BEGIN_POSITION,
"begin-position", ARG_NON },
6193 { OP_BACKREF1,
"backref1", ARG_NON },
6194 { OP_BACKREF2,
"backref2", ARG_NON },
6195 { OP_BACKREFN,
"backrefn", ARG_MEMNUM },
6196 { OP_BACKREFN_IC,
"backrefn-ic", ARG_SPECIAL },
6197 { OP_BACKREF_MULTI,
"backref_multi", ARG_SPECIAL },
6198 { OP_BACKREF_MULTI_IC,
"backref_multi-ic", ARG_SPECIAL },
6199 { OP_BACKREF_WITH_LEVEL,
"backref_at_level", ARG_SPECIAL },
6200 { OP_MEMORY_START_PUSH,
"mem-start-push", ARG_MEMNUM },
6201 { OP_MEMORY_START,
"mem-start", ARG_MEMNUM },
6202 { OP_MEMORY_END_PUSH,
"mem-end-push", ARG_MEMNUM },
6203 { OP_MEMORY_END_PUSH_REC,
"mem-end-push-rec", ARG_MEMNUM },
6204 { OP_MEMORY_END,
"mem-end", ARG_MEMNUM },
6205 { OP_MEMORY_END_REC,
"mem-end-rec", ARG_MEMNUM },
6206 { OP_SET_OPTION_PUSH,
"set-option-push", ARG_OPTION },
6207 { OP_SET_OPTION,
"set-option", ARG_OPTION },
6208 { OP_KEEP,
"keep", ARG_NON },
6209 { OP_FAIL,
"fail", ARG_NON },
6210 { OP_JUMP,
"jump", ARG_RELADDR },
6211 { OP_PUSH,
"push", ARG_RELADDR },
6212 { OP_POP,
"pop", ARG_NON },
6213 { OP_PUSH_OR_JUMP_EXACT1,
"push-or-jump-e1", ARG_SPECIAL },
6214 { OP_PUSH_IF_PEEK_NEXT,
"push-if-peek-next", ARG_SPECIAL },
6215 { OP_REPEAT,
"repeat", ARG_SPECIAL },
6216 { OP_REPEAT_NG,
"repeat-ng", ARG_SPECIAL },
6217 { OP_REPEAT_INC,
"repeat-inc", ARG_MEMNUM },
6218 { OP_REPEAT_INC_NG,
"repeat-inc-ng", ARG_MEMNUM },
6219 { OP_REPEAT_INC_SG,
"repeat-inc-sg", ARG_MEMNUM },
6220 { OP_REPEAT_INC_NG_SG,
"repeat-inc-ng-sg", ARG_MEMNUM },
6221 { OP_NULL_CHECK_START,
"null-check-start", ARG_MEMNUM },
6222 { OP_NULL_CHECK_END,
"null-check-end", ARG_MEMNUM },
6223 { OP_NULL_CHECK_END_MEMST,
"null-check-end-memst", ARG_MEMNUM },
6224 { OP_NULL_CHECK_END_MEMST_PUSH,
"null-check-end-memst-push", ARG_MEMNUM },
6225 { OP_PUSH_POS,
"push-pos", ARG_NON },
6226 { OP_POP_POS,
"pop-pos", ARG_NON },
6227 { OP_PUSH_POS_NOT,
"push-pos-not", ARG_RELADDR },
6228 { OP_FAIL_POS,
"fail-pos", ARG_NON },
6229 { OP_PUSH_STOP_BT,
"push-stop-bt", ARG_NON },
6230 { OP_POP_STOP_BT,
"pop-stop-bt", ARG_NON },
6231 { OP_LOOK_BEHIND,
"look-behind", ARG_SPECIAL },
6232 { OP_PUSH_LOOK_BEHIND_NOT,
"push-look-behind-not", ARG_SPECIAL },
6233 { OP_FAIL_LOOK_BEHIND_NOT,
"fail-look-behind-not", ARG_NON },
6234 { OP_PUSH_ABSENT_POS,
"push-absent-pos", ARG_NON },
6235 { OP_ABSENT,
"absent", ARG_RELADDR },
6236 { OP_ABSENT_END,
"absent-end", ARG_NON },
6237 { OP_CALL,
"call", ARG_ABSADDR },
6238 { OP_RETURN,
"return", ARG_NON },
6239 { OP_CONDITION,
"condition", ARG_SPECIAL },
6240 { OP_STATE_CHECK_PUSH,
"state-check-push", ARG_SPECIAL },
6241 { OP_STATE_CHECK_PUSH_OR_JUMP,
"state-check-push-or-jump", ARG_SPECIAL },
6242 { OP_STATE_CHECK,
"state-check", ARG_STATE_CHECK },
6243 { OP_STATE_CHECK_ANYCHAR_STAR,
"state-check-anychar*", ARG_STATE_CHECK },
6244 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6245 "state-check-anychar-ml*", ARG_STATE_CHECK },
6254 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6255 if (opcode == OnigOpInfo[i].opcode)
6256 return OnigOpInfo[i].name;
6262op2arg_type(
int opcode)
6266 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6267 if (opcode == OnigOpInfo[i].opcode)
6268 return OnigOpInfo[i].arg_type;
6273# ifdef ONIG_DEBUG_PARSE_TREE
6275Indent(
FILE* f,
int indent)
6278 for (i = 0; i < indent; i++) putc(
' ', f);
6283p_string(
FILE* f, ptrdiff_t
len, UChar* s)
6286 while (
len-- > 0) { fputc(*s++, f); }
6290p_len_string(
FILE* f, LengthType
len,
int mb_len, UChar* s)
6292 int x =
len * mb_len;
6294 fprintf(f,
":%d:",
len);
6295 while (x-- > 0) { fputc(*s++, f); }
6299onig_print_compiled_byte_code(
FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6306 StateCheckNumType scn;
6310 fprintf(f,
"[%s", op2name(*bp));
6311 arg_type = op2arg_type(*bp);
6312 if (arg_type != ARG_SPECIAL) {
6318 GET_RELADDR_INC(addr, bp);
6319 fprintf(f,
":(%s%d)", (addr >= 0) ?
"+" :
"", addr);
6322 GET_ABSADDR_INC(addr, bp);
6323 fprintf(f,
":(%d)", addr);
6326 GET_LENGTH_INC(
len, bp);
6327 fprintf(f,
":%d",
len);
6330 mem = *((MemNumType* )bp);
6332 fprintf(f,
":%d", mem);
6336 OnigOptionType option = *((OnigOptionType* )bp);
6338 fprintf(f,
":%d", option);
6342 case ARG_STATE_CHECK:
6343 scn = *((StateCheckNumType* )bp);
6344 bp += SIZE_STATE_CHECK_NUM;
6345 fprintf(f,
":%d", scn);
6352 case OP_ANYCHAR_STAR_PEEK_NEXT:
6353 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6354 p_string(f, 1, bp++);
break;
6356 p_string(f, 2, bp); bp += 2;
break;
6358 p_string(f, 3, bp); bp += 3;
break;
6360 p_string(f, 4, bp); bp += 4;
break;
6362 p_string(f, 5, bp); bp += 5;
break;
6364 GET_LENGTH_INC(
len, bp);
6365 p_len_string(f,
len, 1, bp);
6370 p_string(f, 2, bp); bp += 2;
break;
6372 p_string(f, 4, bp); bp += 4;
break;
6374 p_string(f, 6, bp); bp += 6;
break;
6376 GET_LENGTH_INC(
len, bp);
6377 p_len_string(f,
len, 2, bp);
6381 GET_LENGTH_INC(
len, bp);
6382 p_len_string(f,
len, 3, bp);
6389 GET_LENGTH_INC(mb_len, bp);
6390 GET_LENGTH_INC(
len, bp);
6391 fprintf(f,
":%d:%d:", mb_len,
len);
6393 while (n-- > 0) { fputc(*bp++, f); }
6398 len = enclen(enc, bp, bpend);
6399 p_string(f,
len, bp);
6403 GET_LENGTH_INC(
len, bp);
6404 p_len_string(f,
len, 1, bp);
6409 n = bitset_on_num((BitSetRef )bp);
6411 fprintf(f,
":%d", n);
6415 n = bitset_on_num((BitSetRef )bp);
6417 fprintf(f,
":%d", n);
6421 case OP_CCLASS_MB_NOT:
6422 GET_LENGTH_INC(
len, bp);
6424# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6427 GET_CODE_POINT(code, q);
6429 fprintf(f,
":%d:%d", (
int )code,
len);
6433 case OP_CCLASS_MIX_NOT:
6434 n = bitset_on_num((BitSetRef )bp);
6436 GET_LENGTH_INC(
len, bp);
6438# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6441 GET_CODE_POINT(code, q);
6443 fprintf(f,
":%d:%d:%d", n, (
int )code,
len);
6446 case OP_BACKREFN_IC:
6447 mem = *((MemNumType* )bp);
6449 fprintf(f,
":%d", mem);
6452 case OP_BACKREF_MULTI_IC:
6453 case OP_BACKREF_MULTI:
6455 GET_LENGTH_INC(
len, bp);
6456 for (i = 0; i <
len; i++) {
6457 GET_MEMNUM_INC(mem, bp);
6458 if (i > 0) fputs(
", ", f);
6459 fprintf(f,
"%d", mem);
6463 case OP_BACKREF_WITH_LEVEL:
6465 OnigOptionType option;
6468 GET_OPTION_INC(option, bp);
6469 fprintf(f,
":%d", option);
6470 GET_LENGTH_INC(level, bp);
6471 fprintf(f,
":%d", level);
6474 GET_LENGTH_INC(
len, bp);
6475 for (i = 0; i <
len; i++) {
6476 GET_MEMNUM_INC(mem, bp);
6477 if (i > 0) fputs(
", ", f);
6478 fprintf(f,
"%d", mem);
6486 mem = *((MemNumType* )bp);
6488 addr = *((RelAddrType* )bp);
6490 fprintf(f,
":%d:%d", mem, addr);
6494 case OP_PUSH_OR_JUMP_EXACT1:
6495 case OP_PUSH_IF_PEEK_NEXT:
6496 addr = *((RelAddrType* )bp);
6498 fprintf(f,
":(%s%d)", (addr >= 0) ?
"+" :
"", addr);
6503 case OP_LOOK_BEHIND:
6504 GET_LENGTH_INC(
len, bp);
6505 fprintf(f,
":%d",
len);
6508 case OP_PUSH_LOOK_BEHIND_NOT:
6509 GET_RELADDR_INC(addr, bp);
6510 GET_LENGTH_INC(
len, bp);
6511 fprintf(f,
":%d:(%s%d)",
len, (addr >= 0) ?
"+" :
"", addr);
6514 case OP_STATE_CHECK_PUSH:
6515 case OP_STATE_CHECK_PUSH_OR_JUMP:
6516 scn = *((StateCheckNumType* )bp);
6517 bp += SIZE_STATE_CHECK_NUM;
6518 addr = *((RelAddrType* )bp);
6520 fprintf(f,
":%d:(%s%d)", scn, (addr >= 0) ?
"+" :
"", addr);
6524 GET_MEMNUM_INC(mem, bp);
6525 GET_RELADDR_INC(addr, bp);
6526 fprintf(f,
":%d:(%s%d)", mem, (addr >= 0) ?
"+" :
"", addr);
6530 fprintf(stderr,
"onig_print_compiled_byte_code: undefined code %d\n",
6535 if (nextp) *nextp = bp;
6538# ifdef ONIG_DEBUG_COMPILE
6540print_compiled_byte_code_list(
FILE* f,
regex_t* reg)
6544 UChar* end = reg->p + reg->used;
6546 fprintf(f,
"code length: %d", reg->used);
6552 fprintf(f,
"\n%ld:", bp - reg->p);
6554 fprintf(f,
" %ld:", bp - reg->p);
6555 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6562# ifdef ONIG_DEBUG_PARSE_TREE
6564print_indent_tree(
FILE* f,
Node* node,
int indent)
6566 int i,
type, container_p = 0;
6571 if (IS_NULL(node)) {
6572 fprintf(f,
"ERROR: null node!!!\n");
6580 if (NTYPE(node) == NT_LIST)
6581 fprintf(f,
"<list:%"PRIxPTR
">\n", (intptr_t )node);
6583 fprintf(f,
"<alt:%"PRIxPTR
">\n", (intptr_t )node);
6585 print_indent_tree(f, NCAR(node), indent + add);
6586 while (IS_NOT_NULL(node = NCDR(node))) {
6587 if (NTYPE(node) != type) {
6588 fprintf(f,
"ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6591 print_indent_tree(f, NCAR(node), indent + add);
6596 fprintf(f,
"<string%s:%"PRIxPTR
">",
6597 (NSTRING_IS_RAW(node) ?
"-raw" :
""), (intptr_t )node);
6598 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6599 if (*p >= 0x20 && *p < 0x7f)
6602 fprintf(f,
" 0x%02x", *p);
6608 fprintf(f,
"<cclass:%"PRIxPTR
">", (intptr_t )node);
6609 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(
"not ", f);
6610 if (NCCLASS(node)->mbuf) {
6611 BBuf* bbuf = NCCLASS(node)->mbuf;
6612 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6613 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6614 fprintf(f,
"%d", *data++);
6615 for (; data < end; data+=2) {
6617 fprintf(f,
"%04x-%04x", data[0], data[1]);
6623 fprintf(f,
"<ctype:%"PRIxPTR
"> ", (intptr_t )node);
6624 switch (NCTYPE(node)->ctype) {
6625 case ONIGENC_CTYPE_WORD:
6626 if (NCTYPE(node)->not != 0)
6627 fputs(
"not word", f);
6633 fprintf(f,
"ERROR: undefined ctype.\n");
6639 fprintf(f,
"<anychar:%"PRIxPTR
">", (intptr_t )node);
6643 fprintf(f,
"<anchor:%"PRIxPTR
"> ", (intptr_t )node);
6644 switch (NANCHOR(node)->type) {
6645 case ANCHOR_BEGIN_BUF: fputs(
"begin buf", f);
break;
6646 case ANCHOR_END_BUF: fputs(
"end buf", f);
break;
6647 case ANCHOR_BEGIN_LINE: fputs(
"begin line", f);
break;
6648 case ANCHOR_END_LINE: fputs(
"end line", f);
break;
6649 case ANCHOR_SEMI_END_BUF: fputs(
"semi end buf", f);
break;
6650 case ANCHOR_BEGIN_POSITION: fputs(
"begin position", f);
break;
6652 case ANCHOR_WORD_BOUND: fputs(
"word bound", f);
break;
6653 case ANCHOR_NOT_WORD_BOUND: fputs(
"not word bound", f);
break;
6654# ifdef USE_WORD_BEGIN_END
6655 case ANCHOR_WORD_BEGIN: fputs(
"word begin", f);
break;
6656 case ANCHOR_WORD_END: fputs(
"word end", f);
break;
6658 case ANCHOR_PREC_READ: fputs(
"prec read", f); container_p = TRUE;
break;
6659 case ANCHOR_PREC_READ_NOT: fputs(
"prec read not", f); container_p = TRUE;
break;
6660 case ANCHOR_LOOK_BEHIND: fputs(
"look_behind", f); container_p = TRUE;
break;
6661 case ANCHOR_LOOK_BEHIND_NOT: fputs(
"look_behind_not",f); container_p = TRUE;
break;
6662 case ANCHOR_KEEP: fputs(
"keep",f);
break;
6665 fprintf(f,
"ERROR: undefined anchor type.\n");
6675 fprintf(f,
"<backref:%"PRIxPTR
">", (intptr_t )node);
6676 for (i = 0; i < br->back_num; i++) {
6677 if (i > 0) fputs(
", ", f);
6678 fprintf(f,
"%d", p[i]);
6683# ifdef USE_SUBEXP_CALL
6687 fprintf(f,
"<call:%"PRIxPTR
">", (intptr_t )node);
6688 p_string(f, cn->name_end - cn->name, cn->name);
6694 fprintf(f,
"<quantifier:%"PRIxPTR
">{%d,%d}%s\n", (intptr_t )node,
6695 NQTFR(node)->lower, NQTFR(node)->upper,
6696 (NQTFR(node)->greedy ?
"" :
"?"));
6697 print_indent_tree(f, NQTFR(node)->target, indent + add);
6701 fprintf(f,
"<enclose:%"PRIxPTR
"> ", (intptr_t )node);
6702 switch (NENCLOSE(node)->type) {
6703 case ENCLOSE_OPTION:
6704 fprintf(f,
"option:%d", NENCLOSE(node)->option);
6706 case ENCLOSE_MEMORY:
6707 fprintf(f,
"memory:%d", NENCLOSE(node)->regnum);
6709 case ENCLOSE_STOP_BACKTRACK:
6710 fprintf(f,
"stop-bt");
6712 case ENCLOSE_CONDITION:
6713 fprintf(f,
"condition:%d", NENCLOSE(node)->regnum);
6715 case ENCLOSE_ABSENT:
6716 fprintf(f,
"absent");
6723 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6727 fprintf(f,
"print_indent_tree: undefined node type %d\n", NTYPE(node));
6731 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6735 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6743 print_indent_tree(f, node, 0);
#define xfree
Old name of ruby_xfree.
#define xrealloc
Old name of ruby_xrealloc.
#define xmalloc
Old name of ruby_xmalloc.
int len
Length of the buffer.
VALUE type(ANYARGS)
ANYARGS-ed function type.