14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/concurrent_set.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/hash.h"
36#include "internal/numeric.h"
37#include "internal/object.h"
38#include "internal/proc.h"
39#include "internal/re.h"
40#include "internal/sanitizers.h"
41#include "internal/string.h"
42#include "internal/transcode.h"
49#include "ruby_assert.h"
54#if defined HAVE_CRYPT_R
55# if defined HAVE_CRYPT_H
58#elif !defined HAVE_CRYPT
59# include "missing/crypt.h"
60# define HAVE_CRYPT_R 1
64#undef rb_usascii_str_new
68#undef rb_usascii_str_new_cstr
69#undef rb_utf8_str_new_cstr
70#undef rb_enc_str_new_cstr
71#undef rb_external_str_new_cstr
72#undef rb_locale_str_new_cstr
73#undef rb_str_dup_frozen
74#undef rb_str_buf_new_cstr
128#define RUBY_MAX_CHAR_LEN 16
129#define STR_PRECOMPUTED_HASH FL_USER4
130#define STR_SHARED_ROOT FL_USER5
131#define STR_BORROWED FL_USER6
132#define STR_TMPLOCK FL_USER7
133#define STR_NOFREE FL_USER18
134#define STR_FAKESTR FL_USER19
136#define STR_SET_NOEMBED(str) do {\
137 FL_SET((str), STR_NOEMBED);\
138 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
140#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
142#define STR_SET_LEN(str, n) do { \
143 RSTRING(str)->len = (n); \
146#define TERM_LEN(str) (rb_str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
147#define TERM_FILL(ptr, termlen) do {\
148 char *const term_fill_ptr = (ptr);\
149 const int term_fill_len = (termlen);\
150 *term_fill_ptr = '\0';\
151 if (UNLIKELY(term_fill_len > 1))\
152 memset(term_fill_ptr, 0, term_fill_len);\
155#define RESIZE_CAPA(str,capacity) do {\
156 const int termlen = TERM_LEN(str);\
157 RESIZE_CAPA_TERM(str,capacity,termlen);\
159#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
160 if (STR_EMBED_P(str)) {\
161 if (str_embed_capa(str) < capacity + termlen) {\
162 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
163 const long tlen = RSTRING_LEN(str);\
164 memcpy(tmp, RSTRING_PTR(str), str_embed_capa(str));\
165 RSTRING(str)->as.heap.ptr = tmp;\
166 RSTRING(str)->len = tlen;\
167 STR_SET_NOEMBED(str);\
168 RSTRING(str)->as.heap.aux.capa = (capacity);\
172 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
173 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
174 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
175 RSTRING(str)->as.heap.aux.capa = (capacity);\
179#define STR_SET_SHARED(str, shared_str) do { \
180 if (!FL_TEST(str, STR_FAKESTR)) { \
181 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
182 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
183 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
184 FL_SET((str), STR_SHARED); \
185 rb_gc_register_pinning_obj(str); \
186 FL_SET((shared_str), STR_SHARED_ROOT); \
187 if (RBASIC_CLASS((shared_str)) == 0) \
188 FL_SET_RAW((shared_str), STR_BORROWED); \
192#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
193#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
196#define STR_ENC_GET(str) get_encoding(str)
199zero_filled(
const char *s,
int n)
202 if (*s++)
return false;
207#if !defined SHARABLE_MIDDLE_SUBSTRING
208# define SHARABLE_MIDDLE_SUBSTRING 0
212SHARABLE_SUBSTRING_P(
VALUE str,
long beg,
long len)
214#if SHARABLE_MIDDLE_SUBSTRING
217 long end = beg +
len;
218 long source_len = RSTRING_LEN(str);
219 return end == source_len || zero_filled(RSTRING_PTR(str) + end, TERM_LEN(str));
224str_embed_capa(
VALUE str)
226 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
230rb_str_reembeddable_p(
VALUE str)
232 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
236rb_str_embed_size(
long capa,
long termlen)
244rb_str_size_as_embedded(
VALUE str)
247 if (STR_EMBED_P(str)) {
249 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
251 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
255 else if (rb_str_reembeddable_p(str)) {
257 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
259 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
262 real_size =
sizeof(
struct RString);
269STR_EMBEDDABLE_P(
long len,
long termlen)
271 return rb_gc_size_allocatable_p(rb_str_embed_size(
len, termlen));
276static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
277static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
279static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
280static inline void str_modifiable(
VALUE str);
285str_make_independent(
VALUE str)
287 long len = RSTRING_LEN(str);
288 int termlen = TERM_LEN(str);
289 str_make_independent_expand((str),
len, 0L, termlen);
292static inline int str_dependent_p(
VALUE str);
295rb_str_make_independent(
VALUE str)
297 if (str_dependent_p(str)) {
298 str_make_independent(str);
303rb_str_make_embedded(
VALUE str)
308 int termlen = TERM_LEN(str);
309 char *buf =
RSTRING(str)->as.heap.ptr;
310 long old_capa =
RSTRING(str)->as.heap.aux.capa + termlen;
314 STR_SET_LEN(str,
len);
317 memcpy(RSTRING_PTR(str), buf,
len);
318 SIZED_FREE_N(buf, old_capa);
325rb_debug_rstring_null_ptr(
const char *func)
327 fprintf(stderr,
"%s is returning NULL!! "
328 "SIGSEGV is highly expected to follow immediately.\n"
329 "If you could reproduce, attach your debugger here, "
330 "and look at the passed string.\n",
335static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
338get_encoding(
VALUE str)
344mustnot_broken(
VALUE str)
346 if (is_broken_string(str)) {
347 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
352mustnot_wchar(
VALUE str)
355 if (rb_enc_mbminlen(enc) > 1) {
356 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
360static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
362#if SIZEOF_LONG == SIZEOF_VOIDP
363#define PRECOMPUTED_FAKESTR_HASH 1
368BARE_STRING_P(
VALUE str)
373static inline st_index_t
374str_do_hash(
VALUE str)
376 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
378 if (e && !is_ascii_string(str)) {
385str_store_precomputed_hash(
VALUE str, st_index_t hash)
391 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
392 size_t free_bytes = str_embed_capa(str) - used_bytes;
396 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
398 FL_SET(str, STR_PRECOMPUTED_HASH);
411 if (
FL_TEST(str, RSTRING_FSTR))
414 bare = BARE_STRING_P(str);
416 if (STR_EMBED_P(str)) {
421 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
428 rb_str_resize(str, RSTRING_LEN(str));
430 fstr = register_fstring(str,
false,
false);
433 str_replace_shared_without_enc(str, fstr);
440static VALUE fstring_table_obj;
443fstring_concurrent_set_hash(
VALUE str)
445#ifdef PRECOMPUTED_FAKESTR_HASH
449 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
466 const char *aptr, *bptr;
473 return (alen == blen &&
475 memcmp(aptr, bptr, alen) == 0);
480 bool force_precompute_hash;
484fstring_concurrent_set_create(
VALUE str,
void *data)
494 long len = RSTRING_LEN(str);
495 long capa =
len +
sizeof(st_index_t);
496 int term_len = TERM_LEN(str);
498 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
500 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
501 STR_SET_LEN(new_str, RSTRING_LEN(str));
503 rb_enc_copy(new_str, str);
504 str_store_precomputed_hash(new_str, str_do_hash(str));
508 rb_enc_copy(new_str, str);
509#ifdef PRECOMPUTED_FAKESTR_HASH
510 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
511 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
525 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
528 if (STR_SHARED_P(str)) {
530 str_make_independent(str);
533 if (!BARE_STRING_P(str)) {
539 RBASIC(str)->flags |= RSTRING_FSTR;
541 RB_OBJ_SET_SHAREABLE(str);
555 .hash = fstring_concurrent_set_hash,
556 .cmp = fstring_concurrent_set_cmp,
557 .create = fstring_concurrent_set_create,
562Init_fstring_table(
void)
564 fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192);
565 rb_gc_register_address(&fstring_table_obj);
569register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
573 .force_precompute_hash = force_precompute_hash
576#if SIZEOF_VOIDP == SIZEOF_LONG
580 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
584 VALUE result = rb_concurrent_set_find_or_insert(&fstring_table_obj, str, &args);
586 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
598rb_obj_is_fstring_table(
VALUE obj)
602 return obj == fstring_table_obj;
606rb_gc_free_fstring(
VALUE obj)
608 ASSERT_vm_locking_with_barrier();
614 rb_concurrent_set_delete_by_identity(fstring_table_obj, obj);
616 RB_DEBUG_COUNTER_INC(obj_str_fstr);
622rb_fstring_foreach_with_replace(
int (*callback)(
VALUE *str,
void *data),
void *data)
624 if (fstring_table_obj) {
625 rb_concurrent_set_foreach_with_replace(fstring_table_obj, callback, data);
630setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
633 RBASIC_SET_SHAPE_ID((
VALUE)fake_str, ROOT_SHAPE_ID);
646 return (
VALUE)fake_str;
655 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
664rb_fstring_new(
const char *ptr,
long len)
666 struct RString fake_str = {RBASIC_INIT};
667 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
673 struct RString fake_str = {RBASIC_INIT};
674 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
678rb_fstring_cstr(
const char *
ptr)
680 return rb_fstring_new(
ptr, strlen(
ptr));
684single_byte_optimizable(
VALUE str)
688 case ENCINDEX_ASCII_8BIT:
689 case ENCINDEX_US_ASCII:
711static inline const char *
712search_nonascii(
const char *p,
const char *e)
716#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
717# if SIZEOF_UINTPTR_T == 8
718# define NONASCII_MASK UINT64_C(0x8080808080808080)
719# elif SIZEOF_UINTPTR_T == 4
720# define NONASCII_MASK UINT32_C(0x80808080)
722# error "don't know what to do."
725# if SIZEOF_UINTPTR_T == 8
726# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
727# elif SIZEOF_UINTPTR_T == 4
728# define NONASCII_MASK 0x80808080UL
730# error "don't know what to do."
734 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
735#if !UNALIGNED_WORD_ACCESS
736 if ((uintptr_t)p % SIZEOF_VOIDP) {
737 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
742 case 7:
if (p[-7]&0x80)
return p-7;
743 case 6:
if (p[-6]&0x80)
return p-6;
744 case 5:
if (p[-5]&0x80)
return p-5;
745 case 4:
if (p[-4]&0x80)
return p-4;
747 case 3:
if (p[-3]&0x80)
return p-3;
748 case 2:
if (p[-2]&0x80)
return p-2;
749 case 1:
if (p[-1]&0x80)
return p-1;
754#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
755#define aligned_ptr(value) \
756 __builtin_assume_aligned((value), sizeof(uintptr_t))
758#define aligned_ptr(value) (value)
761 t = (e - (SIZEOF_VOIDP-1));
763 for (;s < t; s +=
sizeof(uintptr_t)) {
765 memcpy(&word, s,
sizeof(word));
766 if (word & NONASCII_MASK) {
767#ifdef WORDS_BIGENDIAN
768 return (
const char *)s + (nlz_intptr(word&NONASCII_MASK)>>3);
770 return (
const char *)s + (ntz_intptr(word&NONASCII_MASK)>>3);
780 case 7:
if (e[-7]&0x80)
return e-7;
781 case 6:
if (e[-6]&0x80)
return e-6;
782 case 5:
if (e[-5]&0x80)
return e-5;
783 case 4:
if (e[-4]&0x80)
return e-4;
785 case 3:
if (e[-3]&0x80)
return e-3;
786 case 2:
if (e[-2]&0x80)
return e-2;
787 case 1:
if (e[-1]&0x80)
return e-1;
795 const char *e = p +
len;
797 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
799 p = search_nonascii(p, e);
803 if (rb_enc_asciicompat(enc)) {
804 p = search_nonascii(p, e);
807 int ret = rb_enc_precise_mbclen(p, e, enc);
811 p = search_nonascii(p, e);
817 int ret = rb_enc_precise_mbclen(p, e, enc);
833 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
836 p = search_nonascii(p, e);
840 else if (rb_enc_asciicompat(enc)) {
841 p = search_nonascii(p, e);
847 int ret = rb_enc_precise_mbclen(p, e, enc);
854 p = search_nonascii(p, e);
860 int ret = rb_enc_precise_mbclen(p, e, enc);
885 rb_enc_set_index(str1, rb_enc_get_index(str2));
893rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
898 str_enc_copy(dest, src);
899 if (RSTRING_LEN(dest) == 0) {
900 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
911 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
912 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
923rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
925 str_enc_copy(dest, src);
932 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
938 return enc_coderange_scan(str, enc);
942rbimpl_enc_str_coderange_scan(
VALUE str)
944 int cr = enc_coderange_scan(str, get_encoding(str));
949#undef rb_enc_str_coderange
956 cr = rbimpl_enc_str_coderange_scan(str);
960#define rb_enc_str_coderange rb_enc_str_coderange_inline
963rb_enc_str_asciicompat(
VALUE str)
966 return rb_str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
974 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
983str_mod_check(
VALUE s,
const char *p,
long len)
985 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
991str_capacity(
VALUE str,
const int termlen)
993 if (STR_EMBED_P(str)) {
994 return str_embed_capa(str) - termlen;
996 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
1000 return RSTRING(str)->as.heap.aux.capa;
1007 return str_capacity(str, TERM_LEN(str));
1011must_not_null(
const char *
ptr)
1014 rb_raise(rb_eArgError,
"NULL pointer given");
1019str_alloc_embed(
VALUE klass,
size_t capa)
1021 size_t size = rb_str_embed_size(
capa, 0);
1028 str->as.embed.ary[0] = 0;
1034str_alloc_heap(
VALUE klass)
1039 str->as.heap.aux.capa = 0;
1040 str->as.heap.ptr = NULL;
1046empty_str_alloc(
VALUE klass)
1048 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1049 VALUE str = str_alloc_embed(klass, 0);
1050 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1061 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1065 enc = rb_ascii8bit_encoding();
1068 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1070 int termlen = rb_enc_mbminlen(enc);
1072 if (STR_EMBEDDABLE_P(
len, termlen)) {
1073 str = str_alloc_embed(klass,
len + termlen);
1079 str = str_alloc_heap(klass);
1085 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1088 rb_enc_raw_set(str, enc);
1091 memcpy(RSTRING_PTR(str),
ptr,
len);
1094 memset(RSTRING_PTR(str), 0,
len);
1097 STR_SET_LEN(str,
len);
1098 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1105 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1140 __msan_unpoison_string(
ptr);
1160 if (rb_enc_mbminlen(enc) != 1) {
1161 rb_raise(rb_eArgError,
"wchar encoding given");
1163 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1167str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1172 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1176 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1179 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1180 str = str_alloc_heap(klass);
1184 RBASIC(str)->flags |= STR_NOFREE;
1185 rb_enc_associate_index(str, encindex);
1214static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1216 int ecflags,
VALUE ecopts);
1221 int encidx = rb_enc_to_index(enc);
1222 if (rb_enc_get_index(str) == encidx)
1223 return is_ascii_string(str);
1234 if (!to)
return str;
1235 if (!from) from = rb_enc_get(str);
1236 if (from == to)
return str;
1237 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1238 rb_is_ascii8bit_enc(to)) {
1239 if (STR_ENC_GET(str) != to) {
1241 rb_enc_associate(str, to);
1248 from, to, ecflags, ecopts);
1249 if (
NIL_P(newstr)) {
1257rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1262 olen = RSTRING_LEN(newstr);
1263 if (ofs < -olen || olen < ofs)
1265 if (ofs < 0) ofs += olen;
1267 STR_SET_LEN(newstr, ofs);
1271 rb_str_modify(newstr);
1272 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1280 STR_SET_LEN(str, 0);
1281 rb_enc_associate(str, enc);
1287str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1289 int ecflags,
VALUE ecopts)
1294 VALUE econv_wrapper;
1295 const unsigned char *start, *sp;
1296 unsigned char *dest, *dp;
1297 size_t converted_output = (size_t)ofs;
1302 RBASIC_CLEAR_CLASS(econv_wrapper);
1304 if (!ec)
return Qnil;
1307 sp = (
unsigned char*)
ptr;
1309 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1310 (dp = dest + converted_output),
1314 size_t converted_input = sp - start;
1315 size_t rest =
len - converted_input;
1316 converted_output = dp - dest;
1318 if (converted_input && converted_output &&
1319 rest < (LONG_MAX / converted_output)) {
1320 rest = (rest * converted_output) / converted_input;
1325 olen += rest < 2 ? 2 : rest;
1326 rb_str_resize(newstr, olen);
1333 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1335 rb_enc_associate(newstr, to);
1354 const int eidx = rb_enc_to_index(eenc);
1357 return rb_enc_str_new(
ptr,
len, eenc);
1361 if ((eidx == rb_ascii8bit_encindex()) ||
1362 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1366 ienc = rb_default_internal_encoding();
1367 if (!ienc || eenc == ienc) {
1368 return rb_enc_str_new(
ptr,
len, eenc);
1372 if ((eidx == rb_ascii8bit_encindex()) ||
1373 (eidx == rb_usascii_encindex()) ||
1374 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1375 return rb_enc_str_new(
ptr,
len, ienc);
1378 str = rb_enc_str_new(NULL, 0, ienc);
1381 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1382 rb_str_initialize(str,
ptr,
len, eenc);
1390 int eidx = rb_enc_to_index(eenc);
1391 if (eidx == rb_usascii_encindex() &&
1392 !is_ascii_string(str)) {
1393 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1396 rb_enc_associate_index(str, eidx);
1455str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1457 const int termlen = TERM_LEN(str);
1462 if (str_embed_capa(str2) >=
len + termlen) {
1463 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1464 STR_SET_EMBED(str2);
1465 memcpy(ptr2, RSTRING_PTR(str),
len);
1466 TERM_FILL(ptr2+
len, termlen);
1470 if (STR_SHARED_P(str)) {
1471 root =
RSTRING(str)->as.heap.aux.shared;
1480 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1482 rb_fatal(
"about to free a possible shared root");
1484 char *ptr2 = STR_HEAP_PTR(str2);
1486 SIZED_FREE_N(ptr2, STR_HEAP_SIZE(str2));
1489 FL_SET(str2, STR_NOEMBED);
1491 STR_SET_SHARED(str2, root);
1494 STR_SET_LEN(str2,
len);
1502 str_replace_shared_without_enc(str2, str);
1503 rb_enc_cr_str_exact_copy(str2, str);
1510 return str_replace_shared(str_alloc_heap(klass), str);
1527rb_str_new_frozen_String(
VALUE orig)
1535rb_str_frozen_bare_string(
VALUE orig)
1537 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1542rb_str_tmp_frozen_acquire(
VALUE orig)
1545 return str_new_frozen_buffer(0, orig, FALSE);
1549rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1551 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1552 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1554 VALUE str = str_alloc_heap(0);
1557 FL_SET(str, STR_SHARED_ROOT);
1559 size_t capa = str_capacity(orig, TERM_LEN(orig));
1565 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT | RSTRING_FSTR)) {
1566 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1573 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1574 RBASIC(orig)->flags &= ~STR_NOFREE;
1575 STR_SET_SHARED(orig, str);
1577 RB_OBJ_SET_SHAREABLE(str);
1583 RSTRING(str)->as.heap.aux.capa =
capa + (TERM_LEN(orig) - TERM_LEN(str));
1589rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1594 if (STR_EMBED_P(tmp)) {
1597 else if (
FL_TEST_RAW(orig, STR_SHARED | STR_TMPLOCK) == STR_TMPLOCK &&
1603 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1607 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1608 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1613 STR_SET_LEN(tmp, 0);
1621 return str_new_frozen_buffer(klass, orig, TRUE);
1631 VALUE str = str_alloc_heap(klass);
1632 STR_SET_LEN(str, RSTRING_LEN(orig));
1633 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1634 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1635 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1636 RBASIC(orig)->flags &= ~STR_NOFREE;
1637 STR_SET_SHARED(orig, str);
1644str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1648 long len = RSTRING_LEN(orig);
1649 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1650 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1652 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1653 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1659 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1660 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1666 if ((ofs > 0) || (rest > 0) ||
1669 str = str_new_shared(klass,
shared);
1671 RSTRING(str)->as.heap.ptr += ofs;
1672 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1680 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1681 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1683 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1684 STR_SET_LEN(str, RSTRING_LEN(orig));
1690 str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
1693 str = heap_str_make_shared(klass, orig);
1698 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1710str_new_empty_String(
VALUE str)
1713 rb_enc_copy(v, str);
1717#define STR_BUF_MIN_SIZE 63
1722 if (STR_EMBEDDABLE_P(
capa, 1)) {
1730 RSTRING(str)->as.heap.ptr[0] =
'\0';
1750 return str_new(0, 0,
len);
1756 if (STR_EMBED_P(str)) {
1757 RB_DEBUG_COUNTER_INC(obj_str_embed);
1759 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1760 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1761 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1764 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1765 SIZED_FREE_N(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1770rb_str_memsize(
VALUE str)
1772 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1773 return STR_HEAP_SIZE(str);
1783 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1786static inline void str_discard(
VALUE str);
1787static void str_shared_replace(
VALUE str,
VALUE str2);
1792 if (str != str2) str_shared_replace(str, str2);
1803 enc = STR_ENC_GET(str2);
1806 termlen = rb_enc_mbminlen(enc);
1808 STR_SET_LEN(str, RSTRING_LEN(str2));
1810 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1812 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1813 rb_enc_associate(str, enc);
1817 if (STR_EMBED_P(str2)) {
1819 long len = RSTRING_LEN(str2);
1822 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1823 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1824 RSTRING(str2)->as.heap.ptr = new_ptr;
1825 STR_SET_LEN(str2,
len);
1827 STR_SET_NOEMBED(str2);
1830 STR_SET_NOEMBED(str);
1832 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1834 if (
FL_TEST(str2, STR_SHARED)) {
1836 STR_SET_SHARED(str,
shared);
1839 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1843 STR_SET_EMBED(str2);
1844 RSTRING_PTR(str2)[0] = 0;
1845 STR_SET_LEN(str2, 0);
1846 rb_enc_associate(str, enc);
1860 return rb_obj_as_string_result(str, obj);
1876 len = RSTRING_LEN(str2);
1877 if (STR_SHARED_P(str2)) {
1880 STR_SET_NOEMBED(str);
1881 STR_SET_LEN(str,
len);
1882 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1883 STR_SET_SHARED(str,
shared);
1884 rb_enc_cr_str_exact_copy(str, str2);
1887 str_replace_shared(str, str2);
1896 size_t size = rb_str_embed_size(
capa, 0);
1912 str->as.heap.aux.capa = 0;
1913 str->as.heap.ptr = NULL;
1923 encidx = rb_enc_get_index(str);
1924 flags &= ~ENCODING_MASK;
1927 if (encidx) rb_enc_associate_index(dup, encidx);
1937 long len = RSTRING_LEN(str);
1942 STR_SET_LEN(dup, RSTRING_LEN(str));
1943 return str_duplicate_setup_encoding(str, dup, flags);
1952 root =
RSTRING(str)->as.heap.aux.shared;
1955 root = str = str_new_frozen(klass, str);
1961 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1963 STR_SET_SHARED(dup, root);
1964 flags |= RSTRING_NOEMBED | STR_SHARED;
1966 STR_SET_LEN(dup, RSTRING_LEN(str));
1967 return str_duplicate_setup_encoding(str, dup, flags);
1973 if (STR_EMBED_P(str)) {
1974 return str_duplicate_setup_embed(klass, str, dup);
1977 return str_duplicate_setup_heap(klass, str, dup);
1985 if (STR_EMBED_P(str)) {
1986 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1989 dup = str_alloc_heap(klass);
1992 return str_duplicate_setup(klass, str, dup);
2003rb_str_dup_m(
VALUE str)
2005 if (LIKELY(BARE_STRING_P(str))) {
2016 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2023 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2027 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2028 str_duplicate_setup_embed(klass, str, new_str);
2031 new_str = ec_str_alloc_heap(ec, klass);
2032 str_duplicate_setup_heap(klass, str, new_str);
2041rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2043 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2067 static ID keyword_ids[2];
2068 VALUE orig, opt, venc, vcapa;
2073 if (!keyword_ids[0]) {
2074 keyword_ids[0] = rb_id_encoding();
2075 CONST_ID(keyword_ids[1],
"capacity");
2083 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2084 enc = rb_to_encoding(venc);
2086 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2089 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2091 if (
capa < STR_BUF_MIN_SIZE) {
2092 capa = STR_BUF_MIN_SIZE;
2096 len = RSTRING_LEN(orig);
2100 if (orig == str) n = 0;
2102 str_modifiable(str);
2103 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2105 const size_t size = (size_t)
capa + termlen;
2106 const char *
const old_ptr = RSTRING_PTR(str);
2107 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2108 char *new_ptr =
ALLOC_N(
char, size);
2109 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2110 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2112 RSTRING(str)->as.heap.ptr = new_ptr;
2114 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2115 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2116 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2118 STR_SET_LEN(str,
len);
2121 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2122 rb_enc_cr_str_exact_copy(str, orig);
2124 FL_SET(str, STR_NOEMBED);
2131 rb_enc_associate(str, enc);
2143rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2149 static ID keyword_ids[2];
2159 keyword_ids[0] = rb_id_encoding();
2160 CONST_ID(keyword_ids[1],
"capacity");
2162 encoding = kwargs[0];
2163 capacity = kwargs[1];
2172 if (UNDEF_P(encoding)) {
2174 encoding = rb_obj_encoding(orig);
2178 if (!UNDEF_P(encoding)) {
2179 enc = rb_to_encoding(encoding);
2183 if (UNDEF_P(capacity)) {
2185 VALUE empty_str = str_new(klass,
"", 0);
2187 rb_enc_associate(empty_str, enc);
2191 VALUE copy = str_duplicate(klass, orig);
2192 rb_enc_associate(copy, enc);
2205 if (orig_capa >
capa) {
2210 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2211 STR_SET_LEN(str, 0);
2222#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2237static inline uintptr_t
2238count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2243 d = (d>>6) | (~d>>7);
2244 d &= NONASCII_MASK >> 7;
2247#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2249 return rb_popcount_intptr(d);
2253# if SIZEOF_VOIDP == 8
2262enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2268 long diff = (long)(e - p);
2269 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2274 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2275 const uintptr_t *s, *t;
2276 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2277 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2278 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2279 while (p < (
const char *)s) {
2280 if (is_utf8_lead_byte(*p))
len++;
2284 len += count_utf8_lead_bytes_with_word(s);
2287 p = (
const char *)s;
2290 if (is_utf8_lead_byte(*p))
len++;
2296 else if (rb_enc_asciicompat(enc)) {
2301 q = search_nonascii(p, e);
2307 p += rb_enc_fast_mbclen(p, e, enc);
2314 q = search_nonascii(p, e);
2320 p += rb_enc_mbclen(p, e, enc);
2327 for (c=0; p<e; c++) {
2328 p += rb_enc_mbclen(p, e, enc);
2343rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2351 long diff = (long)(e - p);
2352 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2354 else if (rb_enc_asciicompat(enc)) {
2358 q = search_nonascii(p, e);
2366 ret = rb_enc_precise_mbclen(p, e, enc);
2381 for (c=0; p<e; c++) {
2382 ret = rb_enc_precise_mbclen(p, e, enc);
2389 if (p + rb_enc_mbminlen(enc) <= e)
2390 p += rb_enc_mbminlen(enc);
2406 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2407 if (!enc) enc = STR_ENC_GET(str);
2408 p = RSTRING_PTR(str);
2413 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2418 return enc_strlen(p, e, enc, cr);
2425 return str_strlen(str, NULL);
2439 return LONG2NUM(str_strlen(str, NULL));
2451rb_str_bytesize(
VALUE str)
2470rb_str_empty(
VALUE str)
2472 return RBOOL(RSTRING_LEN(str) == 0);
2491 char *ptr1, *ptr2, *ptr3;
2496 enc = rb_enc_check_str(str1, str2);
2499 termlen = rb_enc_mbminlen(enc);
2500 if (len1 > LONG_MAX - len2) {
2501 rb_raise(rb_eArgError,
"string size too big");
2503 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2504 ptr3 = RSTRING_PTR(str3);
2505 memcpy(ptr3, ptr1, len1);
2506 memcpy(ptr3+len1, ptr2, len2);
2507 TERM_FILL(&ptr3[len1+len2], termlen);
2523 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2526 int enc1 = rb_enc_get_index(str1);
2527 int enc2 = rb_enc_get_index(str2);
2532 else if (enc2 < 0) {
2535 else if (enc1 != enc2) {
2538 else if (len1 > LONG_MAX - len2) {
2572 rb_enc_copy(str2, str);
2577 rb_raise(rb_eArgError,
"negative argument");
2579 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2580 if (STR_EMBEDDABLE_P(
len, 1)) {
2582 memset(RSTRING_PTR(str2), 0,
len + 1);
2589 STR_SET_LEN(str2,
len);
2590 rb_enc_copy(str2, str);
2593 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2594 rb_raise(rb_eArgError,
"argument too big");
2597 len *= RSTRING_LEN(str);
2598 termlen = TERM_LEN(str);
2600 ptr2 = RSTRING_PTR(str2);
2602 n = RSTRING_LEN(str);
2603 memcpy(ptr2, RSTRING_PTR(str), n);
2604 while (n <=
len/2) {
2605 memcpy(ptr2 + n, ptr2, n);
2608 memcpy(ptr2 + n, ptr2,
len-n);
2610 STR_SET_LEN(str2,
len);
2611 TERM_FILL(&ptr2[
len], termlen);
2612 rb_enc_cr_str_copy_for_substr(str2, str);
2651rb_check_lockedtmp(
VALUE str)
2653 if (
FL_TEST(str, STR_TMPLOCK)) {
2660#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2662str_modifiable(
VALUE str)
2666 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2667 if (CHILLED_STRING_P(str)) {
2668 CHILLED_STRING_MUTATED(str);
2670 rb_check_lockedtmp(str);
2671 rb_check_frozen(str);
2676str_dependent_p(
VALUE str)
2678 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2688#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2690str_independent(
VALUE str)
2694 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2695 str_modifiable(str);
2696 return !str_dependent_p(str);
2702str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2712 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2717 STR_SET_LEN(str,
len);
2722 oldptr = RSTRING_PTR(str);
2724 memcpy(
ptr, oldptr,
len);
2726 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2727 SIZED_FREE_N(oldptr, STR_HEAP_SIZE(str));
2729 STR_SET_NOEMBED(str);
2730 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2731 TERM_FILL(
ptr +
len, termlen);
2733 STR_SET_LEN(str,
len);
2740 if (!str_independent(str))
2741 str_make_independent(str);
2750 int termlen = TERM_LEN(str);
2751 long len = RSTRING_LEN(str);
2754 rb_raise(rb_eArgError,
"negative expanding string size");
2756 if (expand >= LONG_MAX -
len) {
2757 rb_raise(rb_eArgError,
"string size too big");
2760 if (!str_independent(str)) {
2761 str_make_independent_expand(str,
len, expand, termlen);
2763 else if (expand > 0) {
2764 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2771str_modify_keep_cr(
VALUE str)
2773 if (!str_independent(str))
2774 str_make_independent(str);
2781str_discard(
VALUE str)
2783 str_modifiable(str);
2784 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2785 SIZED_FREE_N(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2786 RSTRING(str)->as.heap.ptr = 0;
2787 STR_SET_LEN(str, 0);
2794 int encindex = rb_enc_get_index(str);
2796 if (RB_UNLIKELY(encindex == -1)) {
2800 if (RB_LIKELY(rb_str_encindex_fastpath(encindex))) {
2805 if (!rb_enc_asciicompat(enc)) {
2827 return RSTRING_PTR(str);
2831str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2833 const char *e = s +
len;
2835 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2836 if (zero_filled(s, minlen))
return s;
2842str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2847 if (str_dependent_p(str)) {
2848 if (!zero_filled(s +
len, termlen))
2849 str_make_independent_expand(str,
len, 0L, termlen);
2852 TERM_FILL(s +
len, termlen);
2855 return RSTRING_PTR(str);
2859rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2861 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2862 long len = RSTRING_LEN(str);
2866 rb_check_lockedtmp(str);
2867 str_make_independent_expand(str,
len, 0L, termlen);
2869 else if (str_dependent_p(str)) {
2870 if (termlen > oldtermlen)
2871 str_make_independent_expand(str,
len, 0L, termlen);
2874 if (!STR_EMBED_P(str)) {
2879 if (termlen > oldtermlen) {
2880 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2888str_null_check(
VALUE str,
int *w)
2890 char *s = RSTRING_PTR(str);
2891 long len = RSTRING_LEN(str);
2894 if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) {
2896 minlen = rb_enc_mbminlen(enc);
2900 if (str_null_char(s,
len, minlen, enc)) {
2903 return str_fill_term(str, s,
len, minlen);
2908 if (!s || memchr(s, 0,
len)) {
2912 s = str_fill_term(str, s,
len, minlen);
2918rb_str_null_check(
VALUE str)
2926 if (RB_LIKELY(rb_str_enc_fastpath(str))) {
2927 if (!s || memchr(s, 0,
len)) {
2928 rb_raise(rb_eArgError,
"string contains null byte");
2933 const char *s = str_null_check(str, &w);
2936 rb_raise(rb_eArgError,
"string contains null char");
2938 rb_raise(rb_eArgError,
"string contains null byte");
2946rb_str_to_cstr(
VALUE str)
2949 return str_null_check(str, &w);
2957 char *s = str_null_check(str, &w);
2960 rb_raise(rb_eArgError,
"string contains null char");
2962 rb_raise(rb_eArgError,
"string contains null byte");
2968rb_str_fill_terminator(
VALUE str,
const int newminlen)
2970 char *s = RSTRING_PTR(str);
2971 long len = RSTRING_LEN(str);
2972 return str_fill_term(str, s,
len, newminlen);
2978 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
3004str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
3013 else if (rb_enc_asciicompat(enc)) {
3014 const char *p2, *e2;
3017 while (p < e && 0 < nth) {
3024 p2 = search_nonascii(p, e2);
3033 n = rb_enc_mbclen(p, e, enc);
3044 while (p < e && nth--) {
3045 p += rb_enc_mbclen(p, e, enc);
3056 return str_nth_len(p, e, &nth, enc);
3060str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3065 p = str_nth_len(p, e, &nth, enc);
3074str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3076 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3077 if (!pp)
return e - p;
3084 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3085 STR_ENC_GET(str), single_byte_optimizable(str));
3090str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3093 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3094 const uintptr_t *s, *t;
3095 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3096 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3097 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3098 while (p < (
const char *)s) {
3099 if (is_utf8_lead_byte(*p)) nth--;
3103 nth -= count_utf8_lead_bytes_with_word(s);
3105 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3109 if (is_utf8_lead_byte(*p)) {
3110 if (nth == 0)
break;
3120str_utf8_offset(
const char *p,
const char *e,
long nth)
3122 const char *pp = str_utf8_nth(p, e, &nth);
3131 if (single_byte_optimizable(str) || pos < 0)
3134 char *p = RSTRING_PTR(str);
3135 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3140str_subseq(
VALUE str,
long beg,
long len)
3148 const int termlen = TERM_LEN(str);
3149 if (!SHARABLE_SUBSTRING_P(str, beg,
len)) {
3150 str2 = rb_enc_str_new(RSTRING_PTR(str) + beg,
len, rb_str_enc_get(str));
3159 if (str_embed_capa(str2) >=
len + termlen) {
3160 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3161 STR_SET_EMBED(str2);
3162 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3163 TERM_FILL(ptr2+
len, termlen);
3165 STR_SET_LEN(str2,
len);
3173 str_replace_shared(str2, str);
3179 RSTRING(str2)->as.heap.ptr += beg;
3180 if (RSTRING_LEN(str2) >
len) {
3181 STR_SET_LEN(str2,
len);
3191 VALUE str2 = str_subseq(str, beg,
len);
3192 rb_enc_cr_str_copy_for_substr(str2, str);
3201 const long blen = RSTRING_LEN(str);
3203 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3205 if (
len < 0)
return 0;
3206 if (beg < 0 && -beg < 0)
return 0;
3210 if (single_byte_optimizable(str)) {
3211 if (beg > blen)
return 0;
3214 if (beg < 0)
return 0;
3216 if (
len > blen - beg)
3218 if (
len < 0)
return 0;
3223 if (
len > -beg)
len = -beg;
3227 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3230 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3236 slen = str_strlen(str, enc);
3238 if (beg < 0)
return 0;
3240 if (
len == 0)
goto end;
3243 else if (beg > 0 && beg > blen) {
3247 if (beg > str_strlen(str, enc))
return 0;
3252 enc == rb_utf8_encoding()) {
3253 p = str_utf8_nth(s, e, &beg);
3254 if (beg > 0)
return 0;
3255 len = str_utf8_offset(p, e,
len);
3261 p = s + beg * char_sz;
3265 else if (
len * char_sz > e - p)
3270 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3271 if (beg > 0)
return 0;
3275 len = str_offset(p, e,
len, enc, 0);
3283static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3288 return str_substr(str, beg,
len, TRUE);
3298str_substr(
VALUE str,
long beg,
long len,
int empty)
3302 if (!p)
return Qnil;
3303 if (!
len && !empty)
return Qnil;
3305 beg = p - RSTRING_PTR(str);
3307 VALUE str2 = str_subseq(str, beg,
len);
3308 rb_enc_cr_str_copy_for_substr(str2, str);
3316 if (CHILLED_STRING_P(str)) {
3321 rb_str_resize(str, RSTRING_LEN(str));
3339 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3382str_uminus(
VALUE str)
3387 return rb_fstring(str);
3391#define rb_str_dup_frozen rb_str_new_frozen
3396 rb_check_frozen(str);
3397 if (
FL_TEST(str, STR_TMPLOCK)) {
3400 FL_SET(str, STR_TMPLOCK);
3407 rb_check_frozen(str);
3408 if (!
FL_TEST(str, STR_TMPLOCK)) {
3428 const int termlen = TERM_LEN(str);
3430 str_modifiable(str);
3431 if (STR_SHARED_P(str)) {
3434 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3435 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3446 else if (
len > RSTRING_LEN(str)) {
3450 const char *
const new_end = RSTRING_PTR(str) +
len;
3460 else if (
len < RSTRING_LEN(str)) {
3468 STR_SET_LEN(str,
len);
3469 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3476 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3479 int independent = str_independent(str);
3480 long slen = RSTRING_LEN(str);
3481 const int termlen = TERM_LEN(str);
3483 if (slen >
len || (termlen != 1 && slen <
len)) {
3489 if (STR_EMBED_P(str)) {
3490 if (
len == slen)
return str;
3491 if (str_embed_capa(str) >=
len + termlen) {
3492 STR_SET_LEN(str,
len);
3496 str_make_independent_expand(str, slen,
len - slen, termlen);
3498 else if (str_embed_capa(str) >=
len + termlen) {
3500 char *
ptr = STR_HEAP_PTR(str);
3502 if (slen >
len) slen =
len;
3505 STR_SET_LEN(str,
len);
3507 SIZED_FREE_N(
ptr,
capa + termlen);
3511 else if (!independent) {
3512 if (
len == slen)
return str;
3513 str_make_independent_expand(str, slen,
len - slen, termlen);
3517 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3518 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3521 else if (
len == slen)
return str;
3522 STR_SET_LEN(str,
len);
3529str_ensure_available_capa(
VALUE str,
long len)
3531 str_modify_keep_cr(str);
3533 const int termlen = TERM_LEN(str);
3534 long olen = RSTRING_LEN(str);
3536 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3537 rb_raise(rb_eArgError,
"string sizes too big");
3540 long total = olen +
len;
3541 long capa = str_capacity(str, termlen);
3544 if (total >= LONG_MAX / 2) {
3547 while (total >
capa) {
3550 RESIZE_CAPA_TERM(str,
capa, termlen);
3555str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3558 str_modify_keep_cr(str);
3563 if (
len == 0)
return 0;
3565 long total, olen,
off = -1;
3567 const int termlen = TERM_LEN(str);
3570 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3574 long capa = str_capacity(str, termlen);
3576 if (olen > LONG_MAX -
len) {
3577 rb_raise(rb_eArgError,
"string sizes too big");
3581 if (total >= LONG_MAX / 2) {
3584 while (total >
capa) {
3587 RESIZE_CAPA_TERM(str,
capa, termlen);
3588 sptr = RSTRING_PTR(str);
3593 memcpy(sptr + olen,
ptr,
len);
3594 STR_SET_LEN(str, total);
3595 TERM_FILL(sptr + total, termlen);
3600#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3601#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3606 if (
len == 0)
return str;
3608 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3610 return str_buf_cat(str,
ptr,
len);
3621rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3626 if (UNLIKELY(!str_independent(str))) {
3627 str_make_independent(str);
3630 long string_length = -1;
3631 const int null_terminator_length = 1;
3636 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3637 rb_raise(rb_eArgError,
"string sizes too big");
3640 long string_capacity = str_capacity(str, null_terminator_length);
3646 if (LIKELY(string_capacity >= string_length + 1)) {
3648 sptr[string_length] = byte;
3649 STR_SET_LEN(str, string_length + 1);
3650 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3654 str_buf_cat(str, (
char *)&
byte, 1);
3670 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3681rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3682 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3691 if (str_encindex == ptr_encindex) {
3693 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3697 str_enc = rb_enc_from_index(str_encindex);
3698 ptr_enc = rb_enc_from_index(ptr_encindex);
3699 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3702 if (RSTRING_LEN(str) == 0) {
3705 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3711 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3715 str_cr = rb_enc_str_coderange(str);
3720 *ptr_cr_ret = ptr_cr;
3722 if (str_encindex != ptr_encindex &&
3725 str_enc = rb_enc_from_index(str_encindex);
3726 ptr_enc = rb_enc_from_index(ptr_encindex);
3731 res_encindex = str_encindex;
3736 res_encindex = str_encindex;
3740 res_encindex = ptr_encindex;
3745 res_encindex = str_encindex;
3752 res_encindex = str_encindex;
3758 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3760 str_buf_cat(str,
ptr,
len);
3766 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3773 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3783 if (rb_enc_asciicompat(enc)) {
3784 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3790 unsigned int c = (
unsigned char)*
ptr;
3791 int len = rb_enc_codelen(c, enc);
3792 rb_enc_mbcput(c, buf, enc);
3793 rb_enc_cr_str_buf_cat(str, buf,
len,
3804 int str2_cr = rb_enc_str_coderange(str2);
3806 if (rb_str_enc_fastpath(str)) {
3810 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3816 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3827 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3843rb_str_concat_literals(
size_t num,
const VALUE *strary)
3847 unsigned long len = 1;
3852 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3854 str_enc_copy_direct(str, strary[0]);
3856 for (i = s; i < num; ++i) {
3857 const VALUE v = strary[i];
3861 if (encidx != ENCINDEX_US_ASCII) {
3863 rb_enc_set_index(str, encidx);
3876rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3878 str_modifiable(str);
3883 else if (argc > 1) {
3886 rb_enc_copy(arg_str, str);
3887 for (i = 0; i < argc; i++) {
3922rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3924 long needed_capacity = 0;
3928 for (
int index = 0; index < argc; index++) {
3929 VALUE obj = argv[index];
3937 needed_capacity += RSTRING_LEN(obj);
3942 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3949 str_ensure_available_capa(str, needed_capacity);
3952 for (
int index = 0; index < argc; index++) {
3953 VALUE obj = argv[index];
3958 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3959 char byte = (char)(
NUM2INT(obj) & 0xFF);
3973 rb_bug(
"append_as_bytes arguments should have been validated");
3977 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
3978 TERM_FILL(sptr, TERM_LEN(str));
3983 for (
int index = 0; index < argc; index++) {
3984 VALUE obj = argv[index];
4001 rb_bug(
"append_as_bytes arguments should have been validated");
4080 if (rb_num_to_uint(str2, &code) == 0) {
4093 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4096 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4099 long pos = RSTRING_LEN(str1);
4104 switch (
len = rb_enc_codelen(code, enc)) {
4105 case ONIGERR_INVALID_CODE_POINT_VALUE:
4106 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4108 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4114 rb_enc_mbcput(code, buf, enc);
4115 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4116 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4118 rb_str_resize(str1, pos+
len);
4119 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4132rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4134 int encidx = rb_enc_to_index(enc);
4136 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4141 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4142 return ENCINDEX_ASCII_8BIT;
4164rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4166 str_modifiable(str);
4171 else if (argc > 1) {
4174 rb_enc_copy(arg_str, str);
4175 for (i = 0; i < argc; i++) {
4188 st_index_t precomputed_hash;
4189 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4191 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4192 return precomputed_hash;
4195 return str_do_hash(str);
4202 const char *ptr1, *ptr2;
4205 return (len1 != len2 ||
4207 memcmp(ptr1, ptr2, len1) != 0);
4219rb_str_hash_m(
VALUE str)
4225#define lesser(a,b) (((a)>(b))?(b):(a))
4233 if (RSTRING_LEN(str1) == 0)
return TRUE;
4234 if (RSTRING_LEN(str2) == 0)
return TRUE;
4237 if (idx1 == idx2)
return TRUE;
4238 rc1 = rb_enc_str_coderange(str1);
4239 rc2 = rb_enc_str_coderange(str2);
4242 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4246 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4256 const char *ptr1, *ptr2;
4259 if (str1 == str2)
return 0;
4262 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4271 if (len1 > len2)
return 1;
4274 if (retval > 0)
return 1;
4308 if (str1 == str2)
return Qtrue;
4315 return rb_str_eql_internal(str1, str2);
4329 if (str1 == str2)
return Qtrue;
4331 return rb_str_eql_internal(str1, str2);
4369 return rb_invcmp(str1, str2);
4411 return str_casecmp(str1, s);
4419 const char *p1, *p1end, *p2, *p2end;
4421 enc = rb_enc_compatible(str1, str2);
4426 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4427 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4428 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4429 while (p1 < p1end && p2 < p2end) {
4431 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4432 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4434 return INT2FIX(c1 < c2 ? -1 : 1);
4441 while (p1 < p1end && p2 < p2end) {
4442 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4443 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4445 if (0 <= c1 && 0 <= c2) {
4449 return INT2FIX(c1 < c2 ? -1 : 1);
4453 l1 = rb_enc_mbclen(p1, p1end, enc);
4454 l2 = rb_enc_mbclen(p2, p2end, enc);
4455 len = l1 < l2 ? l1 : l2;
4456 r = memcmp(p1, p2,
len);
4458 return INT2FIX(r < 0 ? -1 : 1);
4460 return INT2FIX(l1 < l2 ? -1 : 1);
4466 if (p1 == p1end && p2 == p2end)
return INT2FIX(0);
4467 if (p1 == p1end)
return INT2FIX(-1);
4500 return str_casecmp_p(str1, s);
4507 VALUE folded_str1, folded_str2;
4508 VALUE fold_opt = sym_fold;
4510 enc = rb_enc_compatible(str1, str2);
4515 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4516 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4518 return rb_str_eql(folded_str1, folded_str2);
4522strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4523 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4525 const char *search_start = str_ptr;
4526 long pos, search_len = str_len - offset;
4530 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4531 if (pos < 0)
return pos;
4533 if (t == search_start + pos)
break;
4534 search_len -= t - search_start;
4535 if (search_len <= 0)
return -1;
4536 offset += t - search_start;
4539 return pos + offset;
4543#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4544#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4547rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4549 const char *str_ptr, *str_ptr_end, *sub_ptr;
4550 long str_len, sub_len;
4553 enc = rb_enc_check(str, sub);
4554 if (is_broken_string(sub))
return -1;
4556 str_ptr = RSTRING_PTR(str);
4558 str_len = RSTRING_LEN(str);
4559 sub_ptr = RSTRING_PTR(sub);
4560 sub_len = RSTRING_LEN(sub);
4562 if (str_len < sub_len)
return -1;
4565 long str_len_char, sub_len_char;
4566 int single_byte = single_byte_optimizable(str);
4567 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4568 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4570 offset += str_len_char;
4571 if (offset < 0)
return -1;
4573 if (str_len_char - offset < sub_len_char)
return -1;
4574 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4577 if (sub_len == 0)
return offset;
4580 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4593rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4600 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4601 long slen = str_strlen(str, enc);
4603 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4615 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4616 enc, single_byte_optimizable(str));
4626 pos = rb_str_index(str, sub, pos);
4640str_ensure_byte_pos(
VALUE str,
long pos)
4642 if (!single_byte_optimizable(str)) {
4643 const char *s = RSTRING_PTR(str);
4645 const char *p = s + pos;
4646 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4648 "offset %ld does not land on character boundary", pos);
4721rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4727 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4728 long slen = RSTRING_LEN(str);
4730 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4741 str_ensure_byte_pos(str, pos);
4746 pos = RMATCH_BEG(match, 0);
4752 pos = rb_str_byteindex(str, sub, pos);
4753 if (pos >= 0)
return LONG2NUM(pos);
4760memrchr(
const char *search_str,
int chr,
long search_len)
4762 const char *
ptr = search_str + search_len;
4763 while (
ptr > search_str) {
4764 if ((
unsigned char)*(--
ptr) == chr)
return (
void *)
ptr;
4774 char *hit, *adjusted;
4776 long slen, searchlen;
4779 sbeg = RSTRING_PTR(str);
4780 slen = RSTRING_LEN(sub);
4781 if (slen == 0)
return s - sbeg;
4783 t = RSTRING_PTR(sub);
4785 searchlen = s - sbeg + 1;
4787 if (memcmp(s, t, slen) == 0) {
4792 hit = memrchr(sbeg, c, searchlen);
4795 if (hit != adjusted) {
4796 searchlen = adjusted - sbeg;
4799 if (memcmp(hit, t, slen) == 0)
4801 searchlen = adjusted - sbeg;
4802 }
while (searchlen > 0);
4816 enc = rb_enc_check(str, sub);
4817 if (is_broken_string(sub))
return -1;
4818 singlebyte = single_byte_optimizable(str);
4819 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4820 slen = str_strlen(sub, enc);
4823 if (
len < slen)
return -1;
4824 if (
len - pos < slen) pos =
len - slen;
4825 if (
len == 0)
return pos;
4827 sbeg = RSTRING_PTR(str);
4830 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4836 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4837 return str_rindex(str, sub, s, enc);
4849rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4854 long pos,
len = str_strlen(str, enc);
4856 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4858 if (pos < 0 && (pos +=
len) < 0) {
4864 if (pos >
len) pos =
len;
4872 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4873 enc, single_byte_optimizable(str));
4883 pos = rb_str_rindex(str, sub, pos);
4893rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4899 enc = rb_enc_check(str, sub);
4900 if (is_broken_string(sub))
return -1;
4901 len = RSTRING_LEN(str);
4902 slen = RSTRING_LEN(sub);
4905 if (
len < slen)
return -1;
4906 if (
len - pos < slen) pos =
len - slen;
4907 if (
len == 0)
return pos;
4909 sbeg = RSTRING_PTR(str);
4912 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4919 return str_rindex(str, sub, s, enc);
5009rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
5013 long pos,
len = RSTRING_LEN(str);
5015 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
5017 if (pos < 0 && (pos +=
len) < 0) {
5023 if (pos >
len) pos =
len;
5029 str_ensure_byte_pos(str, pos);
5034 pos = RMATCH_BEG(match, 0);
5040 pos = rb_str_byterindex(str, sub, pos);
5041 if (pos >= 0)
return LONG2NUM(pos);
5083 switch (OBJ_BUILTIN_TYPE(y)) {
5137rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5144 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5175rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5179 re = get_pat(argv[0]);
5180 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5189static enum neighbor_char
5195 if (rb_enc_mbminlen(enc) > 1) {
5197 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5199 return NEIGHBOR_NOT_CHAR;
5201 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5203 if (!l)
return NEIGHBOR_NOT_CHAR;
5204 if (l !=
len)
return NEIGHBOR_WRAPPED;
5205 rb_enc_mbcput(c, p, enc);
5206 r = rb_enc_precise_mbclen(p, p +
len, enc);
5208 return NEIGHBOR_NOT_CHAR;
5210 return NEIGHBOR_FOUND;
5213 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5216 return NEIGHBOR_WRAPPED;
5217 ++((
unsigned char*)p)[i];
5218 l = rb_enc_precise_mbclen(p, p+
len, enc);
5222 return NEIGHBOR_FOUND;
5225 memset(p+l, 0xff,
len-l);
5231 for (len2 =
len-1; 0 < len2; len2--) {
5232 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5236 memset(p+len2+1, 0xff,
len-(len2+1));
5241static enum neighbor_char
5246 if (rb_enc_mbminlen(enc) > 1) {
5248 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5250 return NEIGHBOR_NOT_CHAR;
5252 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5253 if (!c)
return NEIGHBOR_NOT_CHAR;
5256 if (!l)
return NEIGHBOR_NOT_CHAR;
5257 if (l !=
len)
return NEIGHBOR_WRAPPED;
5258 rb_enc_mbcput(c, p, enc);
5259 r = rb_enc_precise_mbclen(p, p +
len, enc);
5261 return NEIGHBOR_NOT_CHAR;
5263 return NEIGHBOR_FOUND;
5266 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5269 return NEIGHBOR_WRAPPED;
5270 --((
unsigned char*)p)[i];
5271 l = rb_enc_precise_mbclen(p, p+
len, enc);
5275 return NEIGHBOR_FOUND;
5278 memset(p+l, 0,
len-l);
5284 for (len2 =
len-1; 0 < len2; len2--) {
5285 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5289 memset(p+len2+1, 0,
len-(len2+1));
5303static enum neighbor_char
5304enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5306 enum neighbor_char ret;
5310 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5314 const int max_gaps = 1;
5316 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5318 ctype = ONIGENC_CTYPE_DIGIT;
5320 ctype = ONIGENC_CTYPE_ALPHA;
5322 return NEIGHBOR_NOT_CHAR;
5325 for (
try = 0;
try <= max_gaps; ++
try) {
5326 ret = enc_succ_char(p,
len, enc);
5327 if (ret == NEIGHBOR_FOUND) {
5328 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5330 return NEIGHBOR_FOUND;
5337 ret = enc_pred_char(p,
len, enc);
5338 if (ret == NEIGHBOR_FOUND) {
5339 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5352 return NEIGHBOR_NOT_CHAR;
5355 if (ctype != ONIGENC_CTYPE_DIGIT) {
5357 return NEIGHBOR_WRAPPED;
5361 enc_succ_char(carry,
len, enc);
5362 return NEIGHBOR_WRAPPED;
5380 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5381 rb_enc_cr_str_copy_for_substr(str, orig);
5382 return str_succ(str);
5389 char *sbeg, *s, *e, *last_alnum = 0;
5390 int found_alnum = 0;
5392 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5393 long carry_pos = 0, carry_len = 1;
5394 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5396 slen = RSTRING_LEN(str);
5397 if (slen == 0)
return str;
5399 enc = STR_ENC_GET(str);
5400 sbeg = RSTRING_PTR(str);
5401 s = e = sbeg + slen;
5403 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5404 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5410 l = rb_enc_precise_mbclen(s, e, enc);
5411 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5412 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5413 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5415 case NEIGHBOR_NOT_CHAR:
5417 case NEIGHBOR_FOUND:
5419 case NEIGHBOR_WRAPPED:
5424 carry_pos = s - sbeg;
5429 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5430 enum neighbor_char neighbor;
5431 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5432 l = rb_enc_precise_mbclen(s, e, enc);
5433 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5434 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5436 neighbor = enc_succ_char(tmp, l, enc);
5438 case NEIGHBOR_FOUND:
5442 case NEIGHBOR_WRAPPED:
5445 case NEIGHBOR_NOT_CHAR:
5448 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5450 enc_succ_char(s, l, enc);
5452 if (!rb_enc_asciicompat(enc)) {
5453 MEMCPY(carry, s,
char, l);
5456 carry_pos = s - sbeg;
5460 RESIZE_CAPA(str, slen + carry_len);
5461 sbeg = RSTRING_PTR(str);
5462 s = sbeg + carry_pos;
5463 memmove(s + carry_len, s, slen - carry_pos);
5464 memmove(s, carry, carry_len);
5466 STR_SET_LEN(str, slen);
5467 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5468 rb_enc_str_coderange(str);
5483rb_str_succ_bang(
VALUE str)
5491all_digits_p(
const char *s,
long len)
5519 VALUE end, exclusive;
5523 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5529 VALUE current, after_end;
5536 enc = rb_enc_check(beg, end);
5537 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5539 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5540 char c = RSTRING_PTR(beg)[0];
5541 char e = RSTRING_PTR(end)[0];
5543 if (c > e || (excl && c == e))
return beg;
5545 VALUE str = rb_enc_str_new(&c, 1, enc);
5547 if ((*each)(str, arg))
break;
5548 if (!excl && c == e)
break;
5550 if (excl && c == e)
break;
5555 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5556 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5557 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5562 b = rb_str_to_inum(beg, 10, FALSE);
5563 e = rb_str_to_inum(end, 10, FALSE);
5570 if (excl && bi == ei)
break;
5571 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5576 ID op = excl ?
'<' : idLE;
5577 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5582 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5583 b = rb_funcallv(b, succ, 0, 0);
5590 if (n > 0 || (excl && n == 0))
return beg;
5592 after_end = rb_funcallv(end, succ, 0, 0);
5597 next = rb_funcallv(current, succ, 0, 0);
5598 if ((*each)(current, arg))
break;
5599 if (
NIL_P(next))
break;
5603 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5618 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5619 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5620 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5622 b = rb_str_to_inum(beg, 10, FALSE);
5628 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5636 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5637 b = rb_funcallv(b, succ, 0, 0);
5643 VALUE next = rb_funcallv(current, succ, 0, 0);
5644 if ((*each)(current, arg))
break;
5647 if (RSTRING_LEN(current) == 0)
5658 if (!
rb_equal(str, *argp))
return 0;
5672 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5673 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5674 rb_enc_asciicompat(STR_ENC_GET(val))) {
5675 const char *bp = RSTRING_PTR(beg);
5676 const char *ep = RSTRING_PTR(end);
5677 const char *vp = RSTRING_PTR(val);
5678 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5679 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5687 if (b <= v && v < e)
return Qtrue;
5688 return RBOOL(!
RTEST(exclusive) && v == e);
5695 all_digits_p(bp, RSTRING_LEN(beg)) &&
5696 all_digits_p(ep, RSTRING_LEN(end))) {
5701 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5703 return RBOOL(
NIL_P(val));
5726 return rb_str_subpat(str, indx,
INT2FIX(0));
5729 if (rb_str_index(str, indx, 0) != -1)
5735 long beg,
len = str_strlen(str, NULL);
5747 return str_substr(str, idx, 1, FALSE);
5764rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5768 return rb_str_subpat(str, argv[0], argv[1]);
5771 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5775 return rb_str_aref(str, argv[0]);
5781 char *
ptr = RSTRING_PTR(str);
5782 long olen = RSTRING_LEN(str), nlen;
5784 str_modifiable(str);
5785 if (
len > olen)
len = olen;
5787 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5789 size_t old_capa =
RSTRING(str)->as.heap.aux.capa + TERM_LEN(str);
5790 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5793 memmove(
ptr, oldptr +
len, nlen);
5794 if (fl == STR_NOEMBED) {
5795 SIZED_FREE_N(oldptr, old_capa);
5799 if (!STR_SHARED_P(str)) {
5801 rb_enc_cr_str_exact_copy(
shared, str);
5806 STR_SET_LEN(str, nlen);
5808 if (!SHARABLE_MIDDLE_SUBSTRING) {
5809 TERM_FILL(
ptr + nlen, TERM_LEN(str));
5816rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5822 if (beg == 0 && vlen == 0) {
5827 str_modify_keep_cr(str);
5831 RESIZE_CAPA(str, slen + vlen -
len);
5832 sptr = RSTRING_PTR(str);
5836 cr = rb_enc_str_coderange(val);
5841 memmove(sptr + beg + vlen,
5843 slen - (beg +
len));
5845 if (vlen < beg &&
len < 0) {
5849 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5852 STR_SET_LEN(str, slen);
5853 TERM_FILL(&sptr[slen], TERM_LEN(str));
5860 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5869 int singlebyte = single_byte_optimizable(str);
5875 enc = rb_enc_check(str, val);
5876 slen = str_strlen(str, enc);
5878 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5887 if (
len > slen - beg) {
5890 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
5895 beg = p - RSTRING_PTR(str);
5897 rb_str_update_0(str, beg,
len, val);
5898 rb_enc_associate(str, enc);
5909 long start, end,
len;
5917 int num_regs = RMATCH_NREGS(match);
5918 if ((nth >= num_regs) || ((nth < 0) && (-nth >= num_regs))) {
5925 start = RMATCH_BEG(match, nth);
5929 end = RMATCH_END(match, nth);
5932 enc = rb_enc_check_str(str, val);
5933 rb_str_update_0(str, start,
len, val);
5934 rb_enc_associate(str, enc);
5942 switch (
TYPE(indx)) {
5944 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5948 beg = rb_str_index(str, indx, 0);
5987rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5991 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5999 return rb_str_aset(str, argv[0], argv[1]);
6051rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6059 str_modify_keep_cr(str);
6064 int num_regs = RMATCH_NREGS(match);
6067 if ((nth += num_regs) <= 0)
return Qnil;
6069 else if (nth >= num_regs)
return Qnil;
6070 beg = RMATCH_BEG(match, nth);
6071 len = RMATCH_END(match, nth) - beg;
6074 else if (argc == 2) {
6083 beg = p - RSTRING_PTR(str);
6087 beg = rb_str_index(str, indx, 0);
6088 if (beg == -1)
return Qnil;
6089 len = RSTRING_LEN(indx);
6101 beg = p - RSTRING_PTR(str);
6110 beg = p - RSTRING_PTR(str);
6114 rb_enc_cr_str_copy_for_substr(result, str);
6122 char *sptr = RSTRING_PTR(str);
6123 long slen = RSTRING_LEN(str);
6124 if (beg +
len > slen)
6128 slen - (beg +
len));
6130 STR_SET_LEN(str, slen);
6131 TERM_FILL(&sptr[slen], TERM_LEN(str));
6142 switch (OBJ_BUILTIN_TYPE(pat)) {
6161get_pat_quoted(
VALUE pat,
int check)
6165 switch (OBJ_BUILTIN_TYPE(pat)) {
6179 if (check && is_broken_string(pat)) {
6186rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6189 pos = rb_str_byteindex(str, pat, pos);
6190 if (set_backref_str) {
6192 str = rb_str_new_frozen_String(str);
6193 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6195 *match = match_data;
6205 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6210rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6212 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6230rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6245 hash = rb_check_hash_type(repl);
6252 pat = get_pat_quoted(argv[0], 1);
6254 str_modifiable(str);
6255 beg = rb_pat_search(pat, str, 0, 1);
6267 end0 = beg0 + RSTRING_LEN(pat);
6271 beg0 = RMATCH_BEG(match, 0);
6272 end0 = RMATCH_END(match, 0);
6276 if (iter || !
NIL_P(hash)) {
6277 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6283 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6286 str_mod_check(str, p,
len);
6287 rb_check_frozen(str);
6290 repl = rb_reg_regsub_match(repl, str, match);
6293 enc = rb_enc_compatible(str, repl);
6296 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6300 rb_enc_inspect_name(str_enc),
6301 rb_enc_inspect_name(STR_ENC_GET(repl)));
6303 enc = STR_ENC_GET(repl);
6306 rb_enc_associate(str, enc);
6316 rlen = RSTRING_LEN(repl);
6317 len = RSTRING_LEN(str);
6319 RESIZE_CAPA(str,
len + rlen - plen);
6321 p = RSTRING_PTR(str);
6323 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6325 rp = RSTRING_PTR(repl);
6326 memmove(p + beg0, rp, rlen);
6328 STR_SET_LEN(str,
len);
6329 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6352 rb_str_sub_bang(argc, argv, str);
6357str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6360 long beg, beg0, end0;
6361 long offset, blen, slen,
len, last;
6362 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6364 int need_backref_str = -1;
6375 hash = rb_check_hash_type(repl);
6379 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6388 rb_error_arity(argc, 1, 2);
6391 pat = get_pat_quoted(argv[0], 1);
6392 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6395 if (bang)
return Qnil;
6398 if (bang) str_modify_keep_cr(str);
6401 blen = RSTRING_LEN(str) + 30;
6403 sp = RSTRING_PTR(str);
6404 slen = RSTRING_LEN(str);
6406 str_enc = STR_ENC_GET(str);
6407 rb_enc_associate(dest, str_enc);
6413 end0 = beg0 + RSTRING_LEN(pat);
6417 beg0 = RMATCH_BEG(match, 0);
6418 end0 = RMATCH_END(match, 0);
6427 struct RString fake_str = {RBASIC_INIT};
6429 if (mode == FAST_MAP) {
6438 val = rb_hash_aref(hash, key);
6441 str_mod_check(str, sp, slen);
6446 else if (need_backref_str) {
6447 val = rb_reg_regsub_match(repl, str, match);
6448 if (need_backref_str < 0) {
6449 need_backref_str = val != repl;
6456 len = beg0 - offset;
6470 if (RSTRING_LEN(str) <= end0)
break;
6471 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6473 offset = end0 +
len;
6475 cp = RSTRING_PTR(str) + offset;
6476 if (offset > RSTRING_LEN(str))
break;
6479 if (mode != FAST_MAP && mode != STR) {
6482 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6487 if (RSTRING_LEN(str) > offset) {
6490 rb_pat_search0(pat, str, last, 1, &match);
6492 str_shared_replace(str, dest);
6517rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6519 str_modifiable(str);
6520 return str_gsub(argc, argv, str, 1);
6570 return str_gsub(argc, argv, str, 0);
6590 str_modifiable(str);
6591 if (str == str2)
return str;
6595 return str_replace(str, str2);
6612rb_str_clear(
VALUE str)
6616 STR_SET_LEN(str, 0);
6617 RSTRING_PTR(str)[0] = 0;
6618 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6634rb_str_chr(
VALUE str)
6652 pos += RSTRING_LEN(str);
6653 if (pos < 0 || RSTRING_LEN(str) <= pos)
6656 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6676 long len = RSTRING_LEN(str);
6677 char *
ptr, *head, *left = 0;
6681 if (pos < -
len ||
len <= pos)
6688 char byte = (char)(
NUM2INT(w) & 0xFF);
6690 if (!str_independent(str))
6691 str_make_independent(str);
6692 enc = STR_ENC_GET(str);
6693 head = RSTRING_PTR(str);
6695 if (!STR_EMBED_P(str)) {
6702 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6710 width = rb_enc_precise_mbclen(left, head+
len, enc);
6712 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6728str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6730 long n = RSTRING_LEN(str);
6732 if (beg > n ||
len < 0)
return Qnil;
6735 if (beg < 0)
return Qnil;
6740 if (!empty)
return Qnil;
6744 VALUE str2 = str_subseq(str, beg,
len);
6746 str_enc_copy_direct(str2, str);
6748 if (RSTRING_LEN(str2) == 0) {
6749 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6783 long beg,
len = RSTRING_LEN(str);
6791 return str_byte_substr(str, beg,
len, TRUE);
6796 return str_byte_substr(str, idx, 1, FALSE);
6808rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6813 return str_byte_substr(str, beg,
len, TRUE);
6816 return str_byte_aref(str, argv[0]);
6820str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6822 long end, slen = RSTRING_LEN(str);
6825 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6834 if (*
len > slen - *beg) {
6838 str_ensure_byte_pos(str, *beg);
6839 str_ensure_byte_pos(str, end);
6853rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6855 long beg,
len, vbeg, vlen;
6860 if (!(argc == 2 || argc == 3 || argc == 5)) {
6861 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6865 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6866 rb_builtin_class_name(argv[0]));
6873 vlen = RSTRING_LEN(val);
6878 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6879 rb_builtin_class_name(argv[2]));
6891 vlen = RSTRING_LEN(val);
6899 str_check_beg_len(str, &beg, &
len);
6900 str_check_beg_len(val, &vbeg, &vlen);
6901 str_modify_keep_cr(str);
6904 rb_enc_associate(str, rb_enc_check(str, val));
6907 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6929rb_str_reverse(
VALUE str)
6936 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6937 enc = STR_ENC_GET(str);
6943 if (RSTRING_LEN(str) > 1) {
6944 if (single_byte_optimizable(str)) {
6951 int clen = rb_enc_fast_mbclen(s, e, enc);
6959 cr = rb_enc_asciicompat(enc) ?
6962 int clen = rb_enc_mbclen(s, e, enc);
6971 STR_SET_LEN(rev, RSTRING_LEN(str));
6972 str_enc_copy_direct(rev, str);
6994rb_str_reverse_bang(
VALUE str)
6996 if (RSTRING_LEN(str) > 1) {
6997 if (single_byte_optimizable(str)) {
7000 str_modify_keep_cr(str);
7001 s = RSTRING_PTR(str);
7010 str_shared_replace(str, rb_str_reverse(str));
7014 str_modify_keep_cr(str);
7043 i = rb_str_index(str, arg, 0);
7045 return RBOOL(i != -1);
7089 rb_raise(rb_eArgError,
"invalid radix %d", base);
7091 return rb_str_to_inum(str, base, FALSE);
7116rb_str_to_f(
VALUE str)
7133rb_str_to_s(
VALUE str)
7145 char s[RUBY_MAX_CHAR_LEN];
7146 int n = rb_enc_codelen(c, enc);
7148 rb_enc_mbcput(c, s, enc);
7153#define CHAR_ESC_LEN 13
7156rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7158 char buf[CHAR_ESC_LEN + 1];
7166 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7168 else if (c < 0x10000) {
7169 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7172 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7177 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7180 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7183 l = (int)strlen(buf);
7189ruby_escaped_char(
int c)
7192 case '\0':
return "\\0";
7193 case '\n':
return "\\n";
7194 case '\r':
return "\\r";
7195 case '\t':
return "\\t";
7196 case '\f':
return "\\f";
7197 case '\013':
return "\\v";
7198 case '\010':
return "\\b";
7199 case '\007':
return "\\a";
7200 case '\033':
return "\\e";
7201 case '\x7f':
return "\\c?";
7207rb_str_escape(
VALUE str)
7211 const char *p = RSTRING_PTR(str);
7213 const char *prev = p;
7214 char buf[CHAR_ESC_LEN + 1];
7216 int unicode_p = rb_enc_unicode_p(enc);
7217 int asciicompat = rb_enc_asciicompat(enc);
7222 int n = rb_enc_precise_mbclen(p, pend, enc);
7224 if (p > prev) str_buf_cat(result, prev, p - prev);
7225 n = rb_enc_mbminlen(enc);
7227 n = (int)(pend - p);
7229 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7230 str_buf_cat(result, buf, strlen(buf));
7236 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7238 cc = ruby_escaped_char(c);
7240 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7241 str_buf_cat(result, cc, strlen(cc));
7244 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7247 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7248 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7252 if (p > prev) str_buf_cat(result, prev, p - prev);
7271 const char *p, *pend, *prev;
7272 char buf[CHAR_ESC_LEN + 1];
7274 rb_encoding *resenc = rb_default_internal_encoding();
7275 int unicode_p = rb_enc_unicode_p(enc);
7276 int asciicompat = rb_enc_asciicompat(enc);
7278 if (resenc == NULL) resenc = rb_default_external_encoding();
7279 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7280 rb_enc_associate(result, resenc);
7281 str_buf_cat2(result,
"\"");
7289 n = rb_enc_precise_mbclen(p, pend, enc);
7291 if (p > prev) str_buf_cat(result, prev, p - prev);
7292 n = rb_enc_mbminlen(enc);
7294 n = (int)(pend - p);
7296 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7297 str_buf_cat(result, buf, strlen(buf));
7303 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7305 if ((asciicompat || unicode_p) &&
7306 (c ==
'"'|| c ==
'\\' ||
7311 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7312 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7313 str_buf_cat2(result,
"\\");
7314 if (asciicompat || enc == resenc) {
7320 case '\n': cc =
'n';
break;
7321 case '\r': cc =
'r';
break;
7322 case '\t': cc =
't';
break;
7323 case '\f': cc =
'f';
break;
7324 case '\013': cc =
'v';
break;
7325 case '\010': cc =
'b';
break;
7326 case '\007': cc =
'a';
break;
7327 case 033: cc =
'e';
break;
7328 default: cc = 0;
break;
7331 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7334 str_buf_cat(result, buf, 2);
7347 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7351 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7352 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7357 if (p > prev) str_buf_cat(result, prev, p - prev);
7358 str_buf_cat2(result,
"\"");
7363#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7376 int encidx = rb_enc_get_index(str);
7379 const char *p, *pend;
7382 int u8 = (encidx == rb_utf8_encindex());
7383 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7386 if (!rb_enc_asciicompat(enc)) {
7388 len += strlen(enc->name);
7391 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7394 unsigned char c = *p++;
7397 case '"':
case '\\':
7398 case '\n':
case '\r':
7399 case '\t':
case '\f':
7400 case '\013':
case '\010':
case '\007':
case '\033':
7405 clen = IS_EVSTR(p, pend) ? 2 : 1;
7413 if (u8 && c > 0x7F) {
7414 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7416 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7419 else if (cc <= 0xFFFFF)
7432 if (clen > LONG_MAX -
len) {
7439 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7440 q = RSTRING_PTR(result); qend = q +
len + 1;
7444 unsigned char c = *p++;
7446 if (c ==
'"' || c ==
'\\') {
7450 else if (c ==
'#') {
7451 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7454 else if (c ==
'\n') {
7458 else if (c ==
'\r') {
7462 else if (c ==
'\t') {
7466 else if (c ==
'\f') {
7470 else if (c ==
'\013') {
7474 else if (c ==
'\010') {
7478 else if (c ==
'\007') {
7482 else if (c ==
'\033') {
7492 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7494 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7497 snprintf(q, qend-q,
"u%04X", cc);
7499 snprintf(q, qend-q,
"u{%X}", cc);
7504 snprintf(q, qend-q,
"x%02X", c);
7510 if (!rb_enc_asciicompat(enc)) {
7511 snprintf(q, qend-q, nonascii_suffix, enc->name);
7512 encidx = rb_ascii8bit_encindex();
7515 rb_enc_associate_index(result, encidx);
7521unescape_ascii(
unsigned int c)
7545undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7547 const char *s = *ss;
7551 unsigned char buf[6];
7569 *buf = unescape_ascii(*s);
7581 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7582 if (*penc != enc_utf8) {
7584 rb_enc_associate(undumped, enc_utf8);
7601 if (hexlen == 0 || hexlen > 6) {
7607 if (0xd800 <= c && c <= 0xdfff) {
7610 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7620 if (0xd800 <= c && c <= 0xdfff) {
7623 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7653static VALUE rb_str_is_ascii_only_p(
VALUE str);
7665str_undump(
VALUE str)
7667 const char *s = RSTRING_PTR(str);
7670 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7672 bool binary =
false;
7676 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7679 if (!str_null_check(str, &w)) {
7682 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7683 if (*s !=
'"')
goto invalid_format;
7701 static const char force_encoding_suffix[] =
".force_encoding(\"";
7702 static const char dup_suffix[] =
".dup";
7703 const char *encname;
7708 size =
sizeof(dup_suffix) - 1;
7709 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7711 size =
sizeof(force_encoding_suffix) - 1;
7712 if (s_end - s <= size)
goto invalid_format;
7713 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7717 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7721 s = memchr(s,
'"', s_end-s);
7723 if (!s)
goto invalid_format;
7724 if (s_end - s != 2)
goto invalid_format;
7725 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7727 encidx = rb_enc_find_index2(encname, (
long)size);
7731 rb_enc_associate_index(undumped, encidx);
7741 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7752 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7758 if (rb_enc_dummy_p(enc)) {
7765str_true_enc(
VALUE str)
7768 rb_str_check_dummy_enc(enc);
7772static OnigCaseFoldType
7773check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7778 rb_raise(rb_eArgError,
"too many options");
7779 if (argv[0]==sym_turkic) {
7780 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7782 if (argv[1]==sym_lithuanian)
7783 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7785 rb_raise(rb_eArgError,
"invalid second option");
7788 else if (argv[0]==sym_lithuanian) {
7789 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7791 if (argv[1]==sym_turkic)
7792 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7794 rb_raise(rb_eArgError,
"invalid second option");
7798 rb_raise(rb_eArgError,
"too many options");
7799 else if (argv[0]==sym_ascii)
7800 flags |= ONIGENC_CASE_ASCII_ONLY;
7801 else if (argv[0]==sym_fold) {
7802 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7803 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7805 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7808 rb_raise(rb_eArgError,
"invalid option");
7815 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7821#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7822#ifndef CASEMAP_DEBUG
7823# define CASEMAP_DEBUG 0
7831 OnigUChar space[FLEX_ARY_LEN];
7835mapping_buffer_free(
void *p)
7839 while (current_buffer) {
7840 previous_buffer = current_buffer;
7841 current_buffer = current_buffer->next;
7842 ruby_xfree_sized(previous_buffer, offsetof(
mapping_buffer, space) + previous_buffer->capa);
7848 {0, mapping_buffer_free,},
7857 const OnigUChar *source_current, *source_end;
7858 int target_length = 0;
7859 VALUE buffer_anchor;
7862 size_t buffer_count = 0;
7863 int buffer_length_or_invalid;
7865 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7867 source_current = (OnigUChar*)RSTRING_PTR(source);
7872 while (source_current < source_end) {
7874 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7875 if (CASEMAP_DEBUG) {
7876 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7879 *pre_buffer = current_buffer;
7880 pre_buffer = ¤t_buffer->next;
7881 current_buffer->next = NULL;
7882 current_buffer->capa =
capa;
7883 buffer_length_or_invalid = enc->case_map(flags,
7884 &source_current, source_end,
7885 current_buffer->space,
7886 current_buffer->space+current_buffer->capa,
7888 if (buffer_length_or_invalid < 0) {
7889 current_buffer =
DATA_PTR(buffer_anchor);
7891 mapping_buffer_free(current_buffer);
7892 rb_raise(rb_eArgError,
"input string invalid");
7894 target_length += current_buffer->used = buffer_length_or_invalid;
7896 if (CASEMAP_DEBUG) {
7897 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7900 if (buffer_count==1) {
7901 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7904 char *target_current;
7907 target_current = RSTRING_PTR(target);
7908 current_buffer =
DATA_PTR(buffer_anchor);
7909 while (current_buffer) {
7910 memcpy(target_current, current_buffer->space, current_buffer->used);
7911 target_current += current_buffer->used;
7912 current_buffer = current_buffer->next;
7915 current_buffer =
DATA_PTR(buffer_anchor);
7917 mapping_buffer_free(current_buffer);
7922 str_enc_copy_direct(target, source);
7931 const OnigUChar *source_current, *source_end;
7932 OnigUChar *target_current, *target_end;
7933 long old_length = RSTRING_LEN(source);
7934 int length_or_invalid;
7936 if (old_length == 0)
return Qnil;
7938 source_current = (OnigUChar*)RSTRING_PTR(source);
7940 if (source == target) {
7941 target_current = (OnigUChar*)source_current;
7942 target_end = (OnigUChar*)source_end;
7945 target_current = (OnigUChar*)RSTRING_PTR(target);
7949 length_or_invalid = onigenc_ascii_only_case_map(flags,
7950 &source_current, source_end,
7951 target_current, target_end, enc);
7952 if (length_or_invalid < 0)
7953 rb_raise(rb_eArgError,
"input string invalid");
7954 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7955 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7956 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7957 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7958 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7961 str_enc_copy(target, source);
7967upcase_single(
VALUE str)
7969 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
7970 bool modified =
false;
7973 unsigned int c = *(
unsigned char*)s;
7975 if (
'a' <= c && c <=
'z') {
7976 *s =
'A' + (c -
'a');
7997rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
8000 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
8002 flags = check_case_options(argc, argv, flags);
8003 str_modify_keep_cr(str);
8004 enc = str_true_enc(str);
8005 if (case_option_single_p(flags, enc, str)) {
8006 if (upcase_single(str))
8007 flags |= ONIGENC_CASE_MODIFIED;
8009 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8010 rb_str_ascii_casemap(str, str, &flags, enc);
8012 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8014 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8027rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
8030 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
8033 flags = check_case_options(argc, argv, flags);
8034 enc = str_true_enc(str);
8035 if (case_option_single_p(flags, enc, str)) {
8036 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8037 str_enc_copy_direct(ret, str);
8040 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8042 rb_str_ascii_casemap(str, ret, &flags, enc);
8045 ret = rb_str_casemap(str, &flags, enc);
8052downcase_single(
VALUE str)
8054 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8055 bool modified =
false;
8058 unsigned int c = *(
unsigned char*)s;
8060 if (
'A' <= c && c <=
'Z') {
8061 *s =
'a' + (c -
'A');
8083rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8086 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8088 flags = check_case_options(argc, argv, flags);
8089 str_modify_keep_cr(str);
8090 enc = str_true_enc(str);
8091 if (case_option_single_p(flags, enc, str)) {
8092 if (downcase_single(str))
8093 flags |= ONIGENC_CASE_MODIFIED;
8095 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8096 rb_str_ascii_casemap(str, str, &flags, enc);
8098 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8100 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8114rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8117 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8120 flags = check_case_options(argc, argv, flags);
8121 enc = str_true_enc(str);
8122 if (case_option_single_p(flags, enc, str)) {
8123 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8124 str_enc_copy_direct(ret, str);
8125 downcase_single(ret);
8127 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8129 rb_str_ascii_casemap(str, ret, &flags, enc);
8132 ret = rb_str_casemap(str, &flags, enc);
8152rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8155 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8157 flags = check_case_options(argc, argv, flags);
8158 str_modify_keep_cr(str);
8159 enc = str_true_enc(str);
8160 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8161 if (flags&ONIGENC_CASE_ASCII_ONLY)
8162 rb_str_ascii_casemap(str, str, &flags, enc);
8164 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8166 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8180rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8183 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8186 flags = check_case_options(argc, argv, flags);
8187 enc = str_true_enc(str);
8188 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8189 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8191 rb_str_ascii_casemap(str, ret, &flags, enc);
8194 ret = rb_str_casemap(str, &flags, enc);
8213rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8216 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8218 flags = check_case_options(argc, argv, flags);
8219 str_modify_keep_cr(str);
8220 enc = str_true_enc(str);
8221 if (flags&ONIGENC_CASE_ASCII_ONLY)
8222 rb_str_ascii_casemap(str, str, &flags, enc);
8224 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8226 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8240rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8243 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8246 flags = check_case_options(argc, argv, flags);
8247 enc = str_true_enc(str);
8248 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8249 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8251 rb_str_ascii_casemap(str, ret, &flags, enc);
8254 ret = rb_str_casemap(str, &flags, enc);
8259typedef unsigned char *USTR;
8263 unsigned int now, max;
8275 if (t->p == t->pend)
return -1;
8276 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8279 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8281 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8283 if (t->p < t->pend) {
8284 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8287 if (t->now < 0x80 && c < 0x80) {
8288 rb_raise(rb_eArgError,
8289 "invalid range \"%c-%c\" in string transliteration",
8293 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8297 else if (t->now < c) {
8306 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8307 if (t->now == t->max) {
8312 if (t->now < t->max) {
8328 const unsigned int errc = -1;
8329 unsigned int trans[256];
8331 struct tr trsrc, trrepl;
8333 unsigned int c, c0, last = 0;
8334 int modify = 0, i, l;
8335 unsigned char *s, *send;
8337 int singlebyte = single_byte_optimizable(str);
8341#define CHECK_IF_ASCII(c) \
8342 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8343 (cr = ENC_CODERANGE_VALID) : 0)
8347 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8348 if (RSTRING_LEN(repl) == 0) {
8349 return rb_str_delete_bang(1, &src, str);
8353 e1 = rb_enc_check(str, src);
8354 e2 = rb_enc_check(str, repl);
8359 enc = rb_enc_check(src, repl);
8361 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8362 if (RSTRING_LEN(src) > 1 &&
8363 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8364 trsrc.p + l < trsrc.pend) {
8368 trrepl.p = RSTRING_PTR(repl);
8369 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8370 trsrc.gen = trrepl.gen = 0;
8371 trsrc.now = trrepl.now = 0;
8372 trsrc.max = trrepl.max = 0;
8375 for (i=0; i<256; i++) {
8378 while ((c = trnext(&trsrc, enc)) != errc) {
8383 if (!hash) hash = rb_hash_new();
8387 while ((c = trnext(&trrepl, enc)) != errc)
8390 for (i=0; i<256; i++) {
8391 if (trans[i] != errc) {
8399 for (i=0; i<256; i++) {
8402 while ((c = trnext(&trsrc, enc)) != errc) {
8403 r = trnext(&trrepl, enc);
8404 if (r == errc) r = trrepl.now;
8407 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8410 if (!hash) hash = rb_hash_new();
8418 str_modify_keep_cr(str);
8419 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8420 termlen = rb_enc_mbminlen(enc);
8423 long offset, max = RSTRING_LEN(str);
8424 unsigned int save = -1;
8425 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8430 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8432 SIZED_FREE_N(buf, max + termlen);
8433 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8436 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8438 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8447 if (cflag) c = last;
8450 else if (cflag) c = errc;
8456 if (c != (
unsigned int)-1) {
8462 tlen = rb_enc_codelen(c, enc);
8468 if (enc != e1) may_modify = 1;
8470 if ((offset = t - buf) + tlen > max) {
8471 size_t MAYBE_UNUSED(old) = max + termlen;
8472 max = offset + tlen + (send - s);
8473 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8476 rb_enc_mbcput(c, t, enc);
8477 if (may_modify && memcmp(s, t, tlen) != 0) {
8483 if (!STR_EMBED_P(str)) {
8484 SIZED_FREE_N(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8486 TERM_FILL((
char *)t, termlen);
8487 RSTRING(str)->as.heap.ptr = (
char *)buf;
8488 STR_SET_LEN(str, t - buf);
8489 STR_SET_NOEMBED(str);
8490 RSTRING(str)->as.heap.aux.capa = max;
8494 c = (
unsigned char)*s;
8495 if (trans[c] != errc) {
8512 long offset, max = (long)((send - s) * 1.2);
8513 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8518 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8520 SIZED_FREE_N(buf, max + termlen);
8521 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8524 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8526 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8534 if (cflag) c = last;
8537 else if (cflag) c = errc;
8541 c = cflag ? last : errc;
8544 tlen = rb_enc_codelen(c, enc);
8549 if (enc != e1) may_modify = 1;
8551 if ((offset = t - buf) + tlen > max) {
8552 size_t MAYBE_UNUSED(old) = max + termlen;
8553 max = offset + tlen + (long)((send - s) * 1.2);
8554 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8558 rb_enc_mbcput(c, t, enc);
8559 if (may_modify && memcmp(s, t, tlen) != 0) {
8567 if (!STR_EMBED_P(str)) {
8568 SIZED_FREE_N(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8570 TERM_FILL((
char *)t, termlen);
8571 RSTRING(str)->as.heap.ptr = (
char *)buf;
8572 STR_SET_LEN(str, t - buf);
8573 STR_SET_NOEMBED(str);
8574 RSTRING(str)->as.heap.aux.capa = max;
8580 rb_enc_associate(str, enc);
8602 return tr_trans(str, src, repl, 0);
8647 tr_trans(str, src, repl, 0);
8651#define TR_TABLE_MAX (UCHAR_MAX+1)
8652#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8654tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8657 const unsigned int errc = -1;
8658 char buf[TR_TABLE_MAX];
8661 VALUE table = 0, ptable = 0;
8662 int i, l, cflag = 0;
8664 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8665 tr.gen =
tr.now =
tr.max = 0;
8667 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8672 for (i=0; i<TR_TABLE_MAX; i++) {
8675 stable[TR_TABLE_MAX] = cflag;
8677 else if (stable[TR_TABLE_MAX] && !cflag) {
8678 stable[TR_TABLE_MAX] = 0;
8680 for (i=0; i<TR_TABLE_MAX; i++) {
8684 while ((c = trnext(&
tr, enc)) != errc) {
8685 if (c < TR_TABLE_MAX) {
8686 buf[(
unsigned char)c] = !cflag;
8691 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8694 table = ptable ? ptable : rb_hash_new();
8698 table = rb_hash_new();
8703 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8704 rb_hash_aset(table, key,
Qtrue);
8708 for (i=0; i<TR_TABLE_MAX; i++) {
8709 stable[i] = stable[i] && buf[i];
8711 if (!table && !cflag) {
8718tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8720 if (c < TR_TABLE_MAX) {
8721 return table[c] != 0;
8727 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8728 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8732 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8735 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8750rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8752 char squeez[TR_TABLE_SIZE];
8755 VALUE del = 0, nodel = 0;
8757 int i, ascompat, cr;
8759 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8761 for (i=0; i<argc; i++) {
8765 enc = rb_enc_check(str, s);
8766 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8769 str_modify_keep_cr(str);
8770 ascompat = rb_enc_asciicompat(enc);
8771 s = t = RSTRING_PTR(str);
8778 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8789 c = rb_enc_codepoint_len(s, send, &clen, enc);
8791 if (tr_find(c, squeez, del, nodel)) {
8795 if (t != s) rb_enc_mbcput(c, t, enc);
8802 TERM_FILL(t, TERM_LEN(str));
8803 STR_SET_LEN(str, t - RSTRING_PTR(str));
8806 if (modify)
return str;
8820rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8823 rb_str_delete_bang(argc, argv, str);
8841rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8843 char squeez[TR_TABLE_SIZE];
8845 VALUE del = 0, nodel = 0;
8846 unsigned char *s, *send, *t;
8848 int ascompat, singlebyte = single_byte_optimizable(str);
8852 enc = STR_ENC_GET(str);
8855 for (i=0; i<argc; i++) {
8859 enc = rb_enc_check(str, s);
8860 if (singlebyte && !single_byte_optimizable(s))
8862 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8866 str_modify_keep_cr(str);
8867 s = t = (
unsigned char *)RSTRING_PTR(str);
8868 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8871 ascompat = rb_enc_asciicompat(enc);
8875 unsigned int c = *s++;
8876 if (c != save || (argc > 0 && !squeez[c])) {
8886 if (ascompat && (c = *s) < 0x80) {
8887 if (c != save || (argc > 0 && !squeez[c])) {
8893 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8895 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8896 if (t != s) rb_enc_mbcput(c, t, enc);
8905 TERM_FILL((
char *)t, TERM_LEN(str));
8906 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8907 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8911 if (modify)
return str;
8925rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8928 rb_str_squeeze_bang(argc, argv, str);
8948 return tr_trans(str, src, repl, 1);
8976 tr_trans(str, src, repl, 1);
8989rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8991 char table[TR_TABLE_SIZE];
8993 VALUE del = 0, nodel = 0, tstr;
9003 enc = rb_enc_check(str, tstr);
9006 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
9007 (ptstr = RSTRING_PTR(tstr),
9008 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
9009 !is_broken_string(str)) {
9011 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
9013 s = RSTRING_PTR(str);
9014 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9017 if (*(
unsigned char*)s++ == c) n++;
9023 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
9024 for (i=1; i<argc; i++) {
9027 enc = rb_enc_check(str, tstr);
9028 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
9031 s = RSTRING_PTR(str);
9032 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9034 ascompat = rb_enc_asciicompat(enc);
9038 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9046 c = rb_enc_codepoint_len(s, send, &clen, enc);
9047 if (tr_find(c, table, del, nodel)) {
9058rb_fs_check(
VALUE val)
9062 if (
NIL_P(val))
return 0;
9067static const char isspacetable[256] = {
9068 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9069 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9070 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9071 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9072 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9073 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9074 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9075 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9076 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9077 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9078 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9079 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9080 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9081 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9082 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9083 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9086#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9089split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9091 if (empty_count >= 0 &&
len == 0) {
9092 return empty_count + 1;
9094 if (empty_count > 0) {
9099 }
while (--empty_count > 0);
9103 rb_yield(str_new_empty_String(str));
9104 }
while (--empty_count > 0);
9118 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9122literal_split_pattern(
VALUE spat, split_type_t default_type)
9130 return SPLIT_TYPE_CHARS;
9132 else if (rb_enc_asciicompat(enc)) {
9133 if (
len == 1 && ptr[0] ==
' ') {
9134 return SPLIT_TYPE_AWK;
9139 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9140 return SPLIT_TYPE_AWK;
9143 return default_type;
9156rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9161 split_type_t split_type;
9162 long beg, end, i = 0, empty_count = -1;
9167 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9169 if (lim <= 0) limit =
Qnil;
9170 else if (lim == 1) {
9171 if (RSTRING_LEN(str) == 0)
9182 if (
NIL_P(limit) && !lim) empty_count = 0;
9184 enc = STR_ENC_GET(str);
9185 split_type = SPLIT_TYPE_REGEXP;
9187 spat = get_pat_quoted(spat, 0);
9189 else if (
NIL_P(spat = rb_fs)) {
9190 split_type = SPLIT_TYPE_AWK;
9192 else if (!(spat = rb_fs_check(spat))) {
9193 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9198 if (split_type != SPLIT_TYPE_AWK) {
9203 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9204 if (split_type == SPLIT_TYPE_AWK) {
9206 split_type = SPLIT_TYPE_STRING;
9211 mustnot_broken(spat);
9212 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9220#define SPLIT_STR(beg, len) ( \
9221 empty_count = split_string(result, str, beg, len, empty_count), \
9222 str_mod_check(str, str_start, str_len))
9225 char *ptr = RSTRING_PTR(str);
9226 char *
const str_start = ptr;
9227 const long str_len = RSTRING_LEN(str);
9228 char *
const eptr = str_start + str_len;
9229 if (split_type == SPLIT_TYPE_AWK) {
9236 if (is_ascii_string(str)) {
9237 while (ptr < eptr) {
9238 c = (
unsigned char)*ptr++;
9240 if (ascii_isspace(c)) {
9246 if (!
NIL_P(limit) && lim <= i)
break;
9249 else if (ascii_isspace(c)) {
9250 SPLIT_STR(beg, end-beg);
9253 if (!
NIL_P(limit)) ++i;
9261 while (ptr < eptr) {
9264 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9273 if (!
NIL_P(limit) && lim <= i)
break;
9277 SPLIT_STR(beg, end-beg);
9280 if (!
NIL_P(limit)) ++i;
9288 else if (split_type == SPLIT_TYPE_STRING) {
9289 char *substr_start = ptr;
9290 char *sptr = RSTRING_PTR(spat);
9291 long slen = RSTRING_LEN(spat);
9294 mustnot_broken(str);
9295 enc = rb_enc_check(str, spat);
9296 while (ptr < eptr &&
9297 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9300 if (t != ptr + end) {
9304 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9305 str_mod_check(spat, sptr, slen);
9308 if (!
NIL_P(limit) && lim <= ++i)
break;
9310 beg = ptr - str_start;
9312 else if (split_type == SPLIT_TYPE_CHARS) {
9316 mustnot_broken(str);
9317 enc = rb_enc_get(str);
9318 while (ptr < eptr &&
9319 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9320 SPLIT_STR(ptr - str_start, n);
9322 if (!
NIL_P(limit) && lim <= ++i)
break;
9324 beg = ptr - str_start;
9328 long len = RSTRING_LEN(str);
9335 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9338 end = RMATCH_BEG(match, 0);
9339 if (start == end && RMATCH_BEG(match, 0) == RMATCH_END(match, 0)) {
9344 else if (last_null == 1) {
9345 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9352 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9358 SPLIT_STR(beg, end-beg);
9359 beg = start = RMATCH_END(match, 0);
9363 for (idx = 1; idx < RMATCH_NREGS(match); idx++) {
9364 if (RMATCH_BEG(match, idx) == -1)
continue;
9365 SPLIT_STR(RMATCH_BEG(match, idx), RMATCH_END(match, idx) - RMATCH_BEG(match, idx));
9367 if (!
NIL_P(limit) && lim <= ++i)
break;
9369 if (match) rb_match_unbusy(match);
9371 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9372 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9375 return result ? result : str;
9385 return rb_str_split_m(1, &sep, str);
9388#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9403#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9406chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9408 const char *prev = rb_enc_prev_char(p, e, e, enc);
9411 prev = rb_enc_prev_char(p, e, e, enc);
9412 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9424 RSTRING_LEN(rs) != 1 ||
9425 RSTRING_PTR(rs)[0] !=
'\n')) {
9431#define rb_rs get_rs()
9438 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9439 long pos,
len, rslen;
9445 static ID keywords[1];
9450 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9454 if (!ENUM_ELEM(ary, str)) {
9462 if (!RSTRING_LEN(str))
goto end;
9464 ptr = subptr = RSTRING_PTR(str);
9466 len = RSTRING_LEN(str);
9468 rslen = RSTRING_LEN(rs);
9471 enc = rb_enc_get(str);
9473 enc = rb_enc_check(str, rs);
9478 const char *eol = NULL;
9480 while (subend < pend) {
9481 long chomp_rslen = 0;
9483 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9485 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9487 if (eol == subend)
break;
9491 chomp_rslen = -rslen;
9495 if (!subptr) subptr = subend;
9499 }
while (subend < pend);
9501 if (rslen == 0) chomp_rslen = 0;
9503 subend - subptr + (chomp ? chomp_rslen : rslen));
9504 if (ENUM_ELEM(ary, line)) {
9505 str_mod_check(str, ptr,
len);
9507 subptr = eol = NULL;
9512 rsptr = RSTRING_PTR(rs);
9513 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9522 rsptr = RSTRING_PTR(rs);
9523 rslen = RSTRING_LEN(rs);
9526 while (subptr < pend) {
9527 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9531 if (hit != adjusted) {
9535 subend = hit += rslen;
9538 subend = chomp_newline(subptr, subend, enc);
9545 if (ENUM_ELEM(ary, line)) {
9546 str_mod_check(str, ptr,
len);
9551 if (subptr != pend) {
9554 pend = chomp_newline(subptr, pend, enc);
9556 else if (pend - subptr >= rslen &&
9557 memcmp(pend - rslen, rsptr, rslen) == 0) {
9562 ENUM_ELEM(ary, line);
9583rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9586 return rb_str_enumerate_lines(argc, argv, str, 0);
9641rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9643 VALUE ary = WANTARRAY(
"lines", 0);
9644 return rb_str_enumerate_lines(argc, argv, str, ary);
9658 for (i=0; i<RSTRING_LEN(str); i++) {
9659 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9677rb_str_each_byte(
VALUE str)
9680 return rb_str_enumerate_bytes(str, 0);
9692rb_str_bytes(
VALUE str)
9694 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9695 return rb_str_enumerate_bytes(str, ary);
9713 ptr = RSTRING_PTR(str);
9714 len = RSTRING_LEN(str);
9715 enc = rb_enc_get(str);
9718 for (i = 0; i <
len; i += n) {
9719 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9724 for (i = 0; i <
len; i += n) {
9725 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9746rb_str_each_char(
VALUE str)
9749 return rb_str_enumerate_chars(str, 0);
9761rb_str_chars(
VALUE str)
9764 return rb_str_enumerate_chars(str, ary);
9768rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9773 const char *ptr, *end;
9776 if (single_byte_optimizable(str))
9777 return rb_str_enumerate_bytes(str, ary);
9780 ptr = RSTRING_PTR(str);
9782 enc = STR_ENC_GET(str);
9785 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9806rb_str_each_codepoint(
VALUE str)
9809 return rb_str_enumerate_codepoints(str, 0);
9821rb_str_codepoints(
VALUE str)
9824 return rb_str_enumerate_codepoints(str, ary);
9830 int encidx = rb_enc_to_index(enc);
9832 const OnigUChar source_ascii[] =
"\\X";
9833 const OnigUChar *source = source_ascii;
9834 size_t source_len =
sizeof(source_ascii) - 1;
9837#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9838#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9839#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9840#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9841#define CASE_UTF(e) \
9842 case ENCINDEX_UTF_##e: { \
9843 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9844 source = source_UTF_##e; \
9845 source_len = sizeof(source_UTF_##e); \
9848 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9856 regex_t *reg_grapheme_cluster;
9858 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9859 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9861 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9862 onig_error_code_to_str(message, r, &einfo);
9863 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9866 return reg_grapheme_cluster;
9872 int encidx = rb_enc_to_index(enc);
9873 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9875 if (encidx == rb_utf8_encindex()) {
9876 if (!reg_grapheme_cluster_utf8) {
9877 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9880 return reg_grapheme_cluster_utf8;
9889 size_t grapheme_cluster_count = 0;
9891 const char *ptr, *end;
9893 if (!rb_enc_unicode_p(enc)) {
9897 bool cached_reg_grapheme_cluster =
true;
9898 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9899 if (!reg_grapheme_cluster) {
9900 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9901 cached_reg_grapheme_cluster =
false;
9904 ptr = RSTRING_PTR(str);
9908 OnigPosition
len = onig_match(reg_grapheme_cluster,
9909 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9910 (
const OnigUChar *)ptr, NULL, 0);
9911 if (
len <= 0)
break;
9912 grapheme_cluster_count++;
9916 if (!cached_reg_grapheme_cluster) {
9917 onig_free(reg_grapheme_cluster);
9920 return SIZET2NUM(grapheme_cluster_count);
9924rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9928 const char *ptr0, *ptr, *end;
9930 if (!rb_enc_unicode_p(enc)) {
9931 return rb_str_enumerate_chars(str, ary);
9936 bool cached_reg_grapheme_cluster =
true;
9937 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9938 if (!reg_grapheme_cluster) {
9939 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9940 cached_reg_grapheme_cluster =
false;
9943 ptr0 = ptr = RSTRING_PTR(str);
9947 OnigPosition
len = onig_match(reg_grapheme_cluster,
9948 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9949 (
const OnigUChar *)ptr, NULL, 0);
9950 if (
len <= 0)
break;
9955 if (!cached_reg_grapheme_cluster) {
9956 onig_free(reg_grapheme_cluster);
9976rb_str_each_grapheme_cluster(
VALUE str)
9979 return rb_str_enumerate_grapheme_clusters(str, 0);
9991rb_str_grapheme_clusters(
VALUE str)
9994 return rb_str_enumerate_grapheme_clusters(str, ary);
9998chopped_length(
VALUE str)
10001 const char *p, *p2, *beg, *end;
10003 beg = RSTRING_PTR(str);
10004 end = beg + RSTRING_LEN(str);
10005 if (beg >= end)
return 0;
10006 p = rb_enc_prev_char(beg, end, end, enc);
10008 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
10009 p2 = rb_enc_prev_char(beg, p, end, enc);
10010 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
10028rb_str_chop_bang(
VALUE str)
10030 str_modify_keep_cr(str);
10031 if (RSTRING_LEN(str) > 0) {
10033 len = chopped_length(str);
10034 STR_SET_LEN(str,
len);
10035 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10054rb_str_chop(
VALUE str)
10060smart_chomp(
VALUE str,
const char *e,
const char *p)
10063 if (rb_enc_mbminlen(enc) > 1) {
10068 pp = e - rb_enc_mbminlen(enc);
10071 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10079 if (--e > p && *(e-1) ==
'\r') {
10096 char *pp, *e, *rsptr;
10098 char *
const p = RSTRING_PTR(str);
10099 long len = RSTRING_LEN(str);
10101 if (
len == 0)
return 0;
10104 return smart_chomp(str, e, p);
10107 enc = rb_enc_get(str);
10110 if (rb_enc_mbminlen(enc) > 1) {
10115 pp -= rb_enc_mbminlen(enc);
10118 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10125 while (e > p && *(e-1) ==
'\n') {
10127 if (e > p && *(e-1) ==
'\r')
10133 if (rslen >
len)
return len;
10135 enc = rb_enc_get(rs);
10136 newline = rsptr[rslen-1];
10137 if (rslen == rb_enc_mbminlen(enc)) {
10139 if (newline ==
'\n')
10140 return smart_chomp(str, e, p);
10144 return smart_chomp(str, e, p);
10148 enc = rb_enc_check(str, rs);
10149 if (is_broken_string(rs)) {
10153 if (p[
len-1] == newline &&
10155 memcmp(rsptr, pp, rslen) == 0)) {
10156 if (at_char_boundary(p, pp, e, enc))
10157 return len - rslen;
10169chomp_rs(
int argc,
const VALUE *argv)
10173 VALUE rs = argv[0];
10185 long olen = RSTRING_LEN(str);
10186 long len = chompped_length(str, rs);
10187 if (
len >= olen)
return Qnil;
10188 str_modify_keep_cr(str);
10189 STR_SET_LEN(str,
len);
10190 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10210rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10213 str_modifiable(str);
10214 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10215 rs = chomp_rs(argc, argv);
10217 return rb_str_chomp_string(str, rs);
10230rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10232 VALUE rs = chomp_rs(argc, argv);
10238tr_setup_table_multi(
char table[TR_TABLE_SIZE],
VALUE *tablep,
VALUE *ctablep,
10239 VALUE str,
int num_selectors,
VALUE *selectors)
10243 for (i=0; i<num_selectors; i++) {
10244 VALUE selector = selectors[i];
10248 enc = rb_enc_check(str, selector);
10249 tr_setup_table(selector, table, i==0, tablep, ctablep, enc);
10256 const char *
const start = s;
10258 if (!s || s >= e)
return 0;
10261 if (single_byte_optimizable(str)) {
10262 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10267 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10277lstrip_offset_table(
VALUE str,
const char *s,
const char *e,
rb_encoding *enc,
10278 char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
10280 const char *
const start = s;
10282 if (!s || s >= e)
return 0;
10287 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10289 if (!tr_find(cc, table, del, nodel))
break;
10308rb_str_lstrip_bang(
int argc,
VALUE *argv,
VALUE str)
10312 long olen, loffset;
10314 str_modify_keep_cr(str);
10315 enc = STR_ENC_GET(str);
10318 char table[TR_TABLE_SIZE];
10319 VALUE del = 0, nodel = 0;
10321 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10322 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10325 loffset = lstrip_offset(str, start, start+olen, enc);
10329 long len = olen-loffset;
10330 s = start + loffset;
10331 memmove(start, s,
len);
10332 STR_SET_LEN(str,
len);
10333 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10368rb_str_lstrip(
int argc,
VALUE *argv,
VALUE str)
10375 char table[TR_TABLE_SIZE];
10376 VALUE del = 0, nodel = 0;
10378 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10379 loffset = lstrip_offset_table(str, start, start+
len, STR_ENC_GET(str), table, del, nodel);
10382 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10384 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10393 rb_str_check_dummy_enc(enc);
10397 if (!s || s >= e)
return 0;
10401 if (single_byte_optimizable(str)) {
10403 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10408 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10418rstrip_offset_table(
VALUE str,
const char *s,
const char *e,
rb_encoding *enc,
10419 char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
10424 rb_str_check_dummy_enc(enc);
10428 if (!s || s >= e)
return 0;
10432 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10434 if (!tr_find(c, table, del, nodel))
break;
10454rb_str_rstrip_bang(
int argc,
VALUE *argv,
VALUE str)
10458 long olen, roffset;
10460 str_modify_keep_cr(str);
10461 enc = STR_ENC_GET(str);
10464 char table[TR_TABLE_SIZE];
10465 VALUE del = 0, nodel = 0;
10467 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10468 roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10471 roffset = rstrip_offset(str, start, start+olen, enc);
10474 long len = olen - roffset;
10476 STR_SET_LEN(str,
len);
10477 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10511rb_str_rstrip(
int argc,
VALUE *argv,
VALUE str)
10515 long olen, roffset;
10517 enc = STR_ENC_GET(str);
10520 char table[TR_TABLE_SIZE];
10521 VALUE del = 0, nodel = 0;
10523 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10524 roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10527 roffset = rstrip_offset(str, start, start+olen, enc);
10529 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10547rb_str_strip_bang(
int argc,
VALUE *argv,
VALUE str)
10550 long olen, loffset, roffset;
10553 str_modify_keep_cr(str);
10554 enc = STR_ENC_GET(str);
10558 char table[TR_TABLE_SIZE];
10559 VALUE del = 0, nodel = 0;
10561 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10562 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10563 roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
10566 loffset = lstrip_offset(str, start, start+olen, enc);
10567 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10570 if (loffset > 0 || roffset > 0) {
10571 long len = olen-roffset;
10574 memmove(start, start + loffset,
len);
10576 STR_SET_LEN(str,
len);
10577 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10612rb_str_strip(
int argc,
VALUE *argv,
VALUE str)
10615 long olen, loffset, roffset;
10621 char table[TR_TABLE_SIZE];
10622 VALUE del = 0, nodel = 0;
10624 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10625 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10626 roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
10629 loffset = lstrip_offset(str, start, start+olen, enc);
10630 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10633 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10638scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10641 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10645 end = pos + RSTRING_LEN(pat);
10649 pos = RMATCH_BEG(match, 0);
10650 end = RMATCH_END(match, 0);
10658 if (RSTRING_LEN(str) > end)
10659 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10668 if (
NIL_P(match) || RMATCH_NREGS(match) == 1) {
10673 int num_regs = RMATCH_NREGS(match);
10675 for (
int i = 1; i < num_regs; i++) {
10677 if (RMATCH_BEG(match, i) >= 0) {
10678 s =
rb_str_subseq(str, RMATCH_BEG(match, i), RMATCH_END(match, i) - RMATCH_BEG(match, i));
10706 long last = -1, prev = 0;
10707 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10709 pat = get_pat_quoted(pat, 1);
10710 mustnot_broken(str);
10714 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10719 if (last >= 0) rb_pat_search(pat, str, last, 1);
10724 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10728 str_mod_check(str, p,
len);
10730 if (last >= 0) rb_pat_search(pat, str, last, 1);
10782rb_str_hex(
VALUE str)
10784 return rb_str_to_inum(str, 16, FALSE);
10868rb_str_oct(
VALUE str)
10870 return rb_str_to_inum(str, -8, FALSE);
10873#ifndef HAVE_CRYPT_R
10878 rb_nativethread_lock_t lock;
10879} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10948# define CRYPT_END() ALLOCV_END(databuf)
10951 extern char *crypt(
const char *,
const char *);
10952# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10955 const char *s, *saltp;
10958 char salt_8bit_clean[3];
10962 mustnot_wchar(str);
10963 mustnot_wchar(salt);
10965 saltp = RSTRING_PTR(salt);
10966 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10967 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10971 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10972 salt_8bit_clean[0] = saltp[0] & 0x7f;
10973 salt_8bit_clean[1] = saltp[1] & 0x7f;
10974 salt_8bit_clean[2] =
'\0';
10975 saltp = salt_8bit_clean;
10980# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10981 data->initialized = 0;
10983 res = crypt_r(s, saltp, data);
10986 res = crypt(s, saltp);
11001 size_t res_size = strlen(res)+1;
11002 tmp_buf =
ALLOCA_N(
char, res_size);
11003 memcpy(tmp_buf, res, res_size);
11040 char *ptr, *p, *pend;
11043 unsigned long sum0 = 0;
11048 ptr = p = RSTRING_PTR(str);
11049 len = RSTRING_LEN(str);
11055 str_mod_check(str, ptr,
len);
11058 sum0 += (
unsigned char)*p;
11069 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
11070 sum0 &= (((
unsigned long)1)<<bits)-1;
11090rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
11094 long width,
len, flen = 1, fclen = 1;
11097 const char *f =
" ";
11098 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
11100 int singlebyte = 1, cr;
11104 enc = STR_ENC_GET(str);
11105 termlen = rb_enc_mbminlen(enc);
11109 enc = rb_enc_check(str, pad);
11110 f = RSTRING_PTR(pad);
11111 flen = RSTRING_LEN(pad);
11112 fclen = str_strlen(pad, enc);
11113 singlebyte = single_byte_optimizable(pad);
11114 if (flen == 0 || fclen == 0) {
11115 rb_raise(rb_eArgError,
"zero width padding");
11118 len = str_strlen(str, enc);
11119 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
11121 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
11125 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
11126 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
11128 size = RSTRING_LEN(str);
11129 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
11130 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
11131 (
len += llen2 + rlen2) >= LONG_MAX - size) {
11132 rb_raise(rb_eArgError,
"argument too big");
11136 p = RSTRING_PTR(res);
11138 memset(p, *f, llen);
11142 while (llen >= fclen) {
11148 memcpy(p, f, llen2);
11152 memcpy(p, RSTRING_PTR(str), size);
11155 memset(p, *f, rlen);
11159 while (rlen >= fclen) {
11165 memcpy(p, f, rlen2);
11169 TERM_FILL(p, termlen);
11170 STR_SET_LEN(res, p-RSTRING_PTR(res));
11191rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
11193 return rb_str_justify(argc, argv, str,
'l');
11205rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11207 return rb_str_justify(argc, argv, str,
'r');
11220rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11222 return rb_str_justify(argc, argv, str,
'c');
11238 sep = get_pat_quoted(sep, 0);
11245 pos = RMATCH_BEG(match, 0);
11249 pos = rb_str_index(str, sep, 0);
11250 if (pos < 0)
goto failed;
11255 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11258 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11272 long pos = RSTRING_LEN(str);
11274 sep = get_pat_quoted(sep, 0);
11281 pos = RMATCH_BEG(match, 0);
11286 pos = rb_str_rindex(str, sep, pos);
11295 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11297 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11309rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11313 for (i=0; i<argc; i++) {
11314 VALUE tmp = argv[i];
11316 if (rb_reg_start_with_p(tmp, str))
11320 const char *p, *s, *e;
11325 enc = rb_enc_check(str, tmp);
11326 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11327 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11328 p = RSTRING_PTR(str);
11331 if (!at_char_right_boundary(p, s, e, enc))
11333 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11349rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11353 for (i=0; i<argc; i++) {
11354 VALUE tmp = argv[i];
11355 const char *p, *s, *e;
11360 enc = rb_enc_check(str, tmp);
11361 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11362 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11363 p = RSTRING_PTR(str);
11366 if (!at_char_boundary(p, s, e, enc))
11368 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11384deleted_prefix_length(
VALUE str,
VALUE prefix)
11386 const char *strptr, *prefixptr;
11387 long olen, prefixlen;
11392 if (!is_broken_string(prefix) ||
11393 !rb_enc_asciicompat(enc) ||
11394 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11395 enc = rb_enc_check(str, prefix);
11399 prefixlen = RSTRING_LEN(prefix);
11400 if (prefixlen <= 0)
return 0;
11401 olen = RSTRING_LEN(str);
11402 if (olen < prefixlen)
return 0;
11403 strptr = RSTRING_PTR(str);
11404 prefixptr = RSTRING_PTR(prefix);
11405 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11406 if (is_broken_string(prefix)) {
11407 if (!is_broken_string(str)) {
11411 const char *strend = strptr + olen;
11412 const char *after_prefix = strptr + prefixlen;
11413 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11434rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11437 str_modify_keep_cr(str);
11439 prefixlen = deleted_prefix_length(str, prefix);
11440 if (prefixlen <= 0)
return Qnil;
11454rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11458 prefixlen = deleted_prefix_length(str, prefix);
11459 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11461 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11474deleted_suffix_length(
VALUE str,
VALUE suffix)
11476 const char *strptr, *suffixptr;
11477 long olen, suffixlen;
11481 if (is_broken_string(suffix))
return 0;
11482 enc = rb_enc_check(str, suffix);
11485 suffixlen = RSTRING_LEN(suffix);
11486 if (suffixlen <= 0)
return 0;
11487 olen = RSTRING_LEN(str);
11488 if (olen < suffixlen)
return 0;
11489 strptr = RSTRING_PTR(str);
11490 suffixptr = RSTRING_PTR(suffix);
11491 const char *strend = strptr + olen;
11492 const char *before_suffix = strend - suffixlen;
11493 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11494 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11510rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11512 long olen, suffixlen,
len;
11513 str_modifiable(str);
11515 suffixlen = deleted_suffix_length(str, suffix);
11516 if (suffixlen <= 0)
return Qnil;
11518 olen = RSTRING_LEN(str);
11519 str_modify_keep_cr(str);
11520 len = olen - suffixlen;
11521 STR_SET_LEN(str,
len);
11522 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11538rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11542 suffixlen = deleted_suffix_length(str, suffix);
11543 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11545 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11552 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11558nil_setter_warning(
ID id)
11560 rb_warn_deprecated(
"non-nil '%"PRIsVALUE
"'", NULL, rb_id2str(
id));
11567 if (!
NIL_P(*var)) {
11568 nil_setter_warning(
id);
11575 val = rb_fs_check(val);
11578 "value of %"PRIsVALUE
" must be String or Regexp",
11582 nil_setter_warning(
id);
11599 str_modifiable(str);
11602 int idx = rb_enc_to_index(encoding);
11609 rb_enc_associate_index(str, idx);
11633 if (STR_EMBED_P(str)) {
11634 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11639 str_replace_shared_without_enc(str2, str);
11641 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11671rb_str_valid_encoding_p(
VALUE str)
11673 int cr = rb_enc_str_coderange(str);
11691rb_str_is_ascii_only_p(
VALUE str)
11693 int cr = rb_enc_str_coderange(str);
11701 static const char ellipsis[] =
"...";
11702 const long ellipsislen =
sizeof(ellipsis) - 1;
11704 const long blen = RSTRING_LEN(str);
11705 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11706 VALUE estr, ret = 0;
11709 if (
len * rb_enc_mbminlen(enc) >= blen ||
11713 else if (
len <= ellipsislen ||
11715 if (rb_enc_asciicompat(enc)) {
11717 rb_enc_associate(ret, enc);
11724 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11729 rb_enc_from_encoding(enc), 0,
Qnil);
11740 cr = rb_enc_str_coderange(str);
11742 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11748 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11767 if (enc == STR_ENC_GET(str)) {
11772 return enc_str_scrub(enc, str, repl, cr);
11780 const char *rep, *p, *e, *p1, *sp;
11786 rb_raise(rb_eArgError,
"both of block and replacement given");
11793 if (!
NIL_P(repl)) {
11794 repl = str_compat_and_valid(repl, enc);
11797 if (rb_enc_dummy_p(enc)) {
11800 encidx = rb_enc_to_index(enc);
11802#define DEFAULT_REPLACE_CHAR(str) do { \
11803 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
11804 rep = replace; replen = (int)sizeof(replace); \
11807 slen = RSTRING_LEN(str);
11808 p = RSTRING_PTR(str);
11813 if (rb_enc_asciicompat(enc)) {
11819 else if (!
NIL_P(repl)) {
11820 rep = RSTRING_PTR(repl);
11821 replen = RSTRING_LEN(repl);
11824 else if (encidx == rb_utf8_encindex()) {
11825 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11829 DEFAULT_REPLACE_CHAR(
"?");
11834 p = search_nonascii(p, e);
11839 int ret = rb_enc_precise_mbclen(p, e, enc);
11858 if (e - p < clen) clen = e - p;
11865 for (; clen > 1; clen--) {
11866 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11877 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11878 str_mod_check(str, sp, slen);
11879 repl = str_compat_and_valid(repl, enc);
11886 p = search_nonascii(p, e);
11912 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11913 str_mod_check(str, sp, slen);
11914 repl = str_compat_and_valid(repl, enc);
11923 long mbminlen = rb_enc_mbminlen(enc);
11927 else if (!
NIL_P(repl)) {
11928 rep = RSTRING_PTR(repl);
11929 replen = RSTRING_LEN(repl);
11931 else if (encidx == ENCINDEX_UTF_16BE) {
11932 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11934 else if (encidx == ENCINDEX_UTF_16LE) {
11935 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11937 else if (encidx == ENCINDEX_UTF_32BE) {
11938 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11940 else if (encidx == ENCINDEX_UTF_32LE) {
11941 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11944 DEFAULT_REPLACE_CHAR(
"?");
11948 int ret = rb_enc_precise_mbclen(p, e, enc);
11961 if (e - p < clen) clen = e - p;
11962 if (clen <= mbminlen * 2) {
11967 for (; clen > mbminlen; clen-=mbminlen) {
11968 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11978 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11979 str_mod_check(str, sp, slen);
11980 repl = str_compat_and_valid(repl, enc);
12005 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
12006 str_mod_check(str, sp, slen);
12007 repl = str_compat_and_valid(repl, enc);
12047str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
12055static ID id_normalize;
12056static ID id_normalized_p;
12057static VALUE mUnicodeNormalize;
12060unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
12062 static int UnicodeNormalizeRequired = 0;
12065 if (!UnicodeNormalizeRequired) {
12066 rb_require(
"unicode_normalize/normalize.rb");
12067 UnicodeNormalizeRequired = 1;
12071 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
12082rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
12084 return unicode_normalize_common(argc, argv, str, id_normalize);
12098rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
12100 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
12127rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
12129 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12261#define sym_equal rb_obj_equal
12264sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12268 int c = rb_enc_precise_mbclen(s, send, enc);
12272 c = rb_enc_mbc_to_codepoint(s, send, enc);
12280rb_str_symname_p(
VALUE sym)
12285 rb_encoding *resenc = rb_default_internal_encoding();
12287 if (resenc == NULL) resenc = rb_default_external_encoding();
12288 enc = STR_ENC_GET(sym);
12289 ptr = RSTRING_PTR(sym);
12290 len = RSTRING_LEN(sym);
12291 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12299rb_str_quote_unprintable(
VALUE str)
12307 resenc = rb_default_internal_encoding();
12308 if (resenc == NULL) resenc = rb_default_external_encoding();
12309 enc = STR_ENC_GET(str);
12310 ptr = RSTRING_PTR(str);
12311 len = RSTRING_LEN(str);
12312 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12313 !sym_printable(ptr, ptr +
len, enc)) {
12314 return rb_str_escape(str);
12320rb_id_quote_unprintable(
ID id)
12322 VALUE str = rb_id2str(
id);
12323 if (!rb_str_symname_p(str)) {
12324 return rb_str_escape(str);
12342sym_inspect(
VALUE sym)
12349 if (!rb_str_symname_p(str)) {
12351 len = RSTRING_LEN(str);
12352 rb_str_resize(str,
len + 1);
12353 dest = RSTRING_PTR(str);
12354 memmove(dest + 1, dest,
len);
12358 VALUE orig_str = str;
12360 len = RSTRING_LEN(orig_str);
12361 str = rb_enc_str_new(0,
len + 1, enc);
12364 ptr = RSTRING_PTR(orig_str);
12365 dest = RSTRING_PTR(str);
12366 memcpy(dest + 1, ptr,
len);
12386rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12391 rb_raise(rb_eArgError,
"no receiver given");
12494 return rb_str_match(
rb_sym2str(sym), other);
12509sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12511 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12524sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12526 return rb_str_match_m_p(argc, argv, sym);
12544 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12555sym_length(
VALUE sym)
12569sym_empty(
VALUE sym)
12603sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12619sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12635sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12649sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12651 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12664sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12666 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12678sym_encoding(
VALUE sym)
12684string_for_symbol(
VALUE name)
12689 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12703 name = string_for_symbol(name);
12704 return rb_intern_str(name);
12713 name = string_for_symbol(name);
12737 return rb_fstring(str);
12743 struct RString fake_str = {RBASIC_INIT};
12744 int encidx = ENCINDEX_US_ASCII;
12747 encidx = ENCINDEX_ASCII_8BIT;
12750 VALUE str = setup_fake_str(&fake_str,
ptr,
len, encidx);
12752 return register_fstring(str,
true,
false);
12764 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12765 rb_enc_autoload(enc);
12768 struct RString fake_str = {RBASIC_INIT};
12769 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12775 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12776 rb_enc_autoload(enc);
12779 struct RString fake_str = {RBASIC_INIT};
12780 VALUE str = register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12791#if USE_YJIT || USE_ZJIT
12793rb_jit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12798 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12799 rb_str_buf_cat_byte(str, (
char) code);
12809fstring_set_class_i(
VALUE *str,
void *data)
12813 return ST_CONTINUE;
12821 rb_concurrent_set_foreach_with_replace(fstring_table_obj, fstring_set_class_i, NULL);
12988 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_cObject
Object class.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
Like rb_str_dup(), but always create an instance of rb_cString regardless of the given object's class...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
ID rb_to_id(VALUE str)
Identical to rb_intern_str(), except it tries to convert the parameter object to an instance of rb_cS...
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
#define RB_OBJ_SHAREABLE_P(obj)
Queries if the passed object has previously classified as shareable or not.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define RUBY_TYPED_FREE_IMMEDIATELY
Macros to see if each corresponding flag is defined.
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
union RString::@58::@59::@61 aux
Auxiliary info.
struct RString::@58::@60 embed
Embedded contents.
long len
Length of the string, not including terminating NUL character.
VALUE shared
Parent of the string.
struct RString::@58::@59 heap
Strings that use separated memory region for contents use this pattern.
char * ptr
Pointer to the contents of the string.
union RString::@58 as
String's specific fields.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.