Ruby 4.1.0dev (2026-05-15 revision 4ec235e0b227d38426aa477e537ac397963c0ee8)
string.h
1#ifndef INTERNAL_STRING_H /*-*-C-*-vi:se ft=c:*/
2#define INTERNAL_STRING_H
11#include "ruby/internal/config.h"
12#include <stddef.h> /* for size_t */
13#include "internal/compilers.h" /* for __has_builtin */
14#include "ruby/internal/stdbool.h" /* for bool */
15#include "ruby/encoding.h" /* for rb_encoding */
16#include "ruby/ruby.h" /* for VALUE */
17#include "encindex.h"
18
19#define STR_SHARED FL_USER0 /* = ELTS_SHARED */
20#define STR_NOEMBED FL_USER1
21#define STR_CHILLED (FL_USER2 | FL_USER3)
22#define STR_CHILLED_LITERAL FL_USER2
23#define STR_CHILLED_SYMBOL_TO_S FL_USER3
24
25enum ruby_rstring_private_flags {
26 RSTRING_CHILLED = STR_CHILLED,
27};
28
29#ifdef rb_fstring_cstr
30# undef rb_fstring_cstr
31#endif
32
33static inline bool
34rb_str_encindex_fastpath(int encindex)
35{
36 // The overwhelming majority of strings are in one of these 3 encodings,
37 // which are all either ASCII or perfect ASCII supersets.
38 // Hence you can use fast, single byte algorithms on them, such as `memchr` etc,
39 // without all the overhead of fetching the rb_encoding and using functions such as
40 // rb_enc_mbminlen etc.
41 // Many other encodings could qualify, but they are expected to be rare occurrences,
42 // so it's better to keep that list small.
43 switch (encindex) {
44 case ENCINDEX_ASCII_8BIT:
45 case ENCINDEX_UTF_8:
46 case ENCINDEX_US_ASCII:
47 return true;
48 default:
49 return false;
50 }
51}
52
53static inline bool
54rb_str_enc_fastpath(VALUE str)
55{
56 return rb_str_encindex_fastpath(ENCODING_GET_INLINED(str));
57}
58
59static inline rb_encoding *
60rb_str_enc_get(VALUE str)
61{
63 return rb_enc_from_index(ENCODING_GET(str));
64}
65
66/* string.c */
67VALUE rb_str_dup_m(VALUE str);
68VALUE rb_fstring(VALUE);
69VALUE rb_fstring_cstr(const char *str);
70VALUE rb_fstring_enc_new(const char *ptr, long len, rb_encoding *enc);
71int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p);
72int rb_str_symname_p(VALUE);
73VALUE rb_str_quote_unprintable(VALUE);
74char *rb_str_fill_terminator(VALUE str, const int termlen);
75void rb_str_change_terminator_length(VALUE str, const int oldtermlen, const int termlen);
76VALUE rb_str_locktmp_ensure(VALUE str, VALUE (*func)(VALUE), VALUE arg);
77VALUE rb_str_chomp_string(VALUE str, VALUE chomp);
78VALUE rb_external_str_with_enc(VALUE str, rb_encoding *eenc);
79VALUE rb_str_cat_conv_enc_opts(VALUE newstr, long ofs, const char *ptr, long len,
80 rb_encoding *from, int ecflags, VALUE ecopts);
81VALUE rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl);
82VALUE rb_str_escape(VALUE str);
83size_t rb_str_memsize(VALUE);
84char *rb_str_to_cstr(VALUE str);
85const char *ruby_escaped_char(int c);
86void rb_str_make_independent(VALUE str);
87int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc);
88int rb_ascii8bit_appendable_encoding_index(rb_encoding *enc, unsigned int code);
89VALUE rb_str_include(VALUE str, VALUE arg);
90VALUE rb_str_byte_substr(VALUE str, VALUE beg, VALUE len);
91VALUE rb_str_substr_two_fixnums(VALUE str, VALUE beg, VALUE len, int empty);
92VALUE rb_str_tmp_frozen_no_embed_acquire(VALUE str);
93void rb_str_make_embedded(VALUE);
94VALUE rb_str_upto_each(VALUE, VALUE, int, int (*each)(VALUE, VALUE), VALUE);
95size_t rb_str_size_as_embedded(VALUE);
96bool rb_str_reembeddable_p(VALUE);
97VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE);
98VALUE rb_str_with_debug_created_info(VALUE, VALUE, int);
99VALUE rb_str_frozen_bare_string(VALUE);
100const char *rb_str_null_check(VALUE);
101VALUE rb_str_casecmp(VALUE str1, VALUE str2);
102
103/* error.c */
104void rb_warn_unchilled_literal(VALUE str);
105void rb_warn_unchilled_symbol_to_s(VALUE str);
106
107static inline bool STR_EMBED_P(VALUE str);
108static inline bool STR_SHARED_P(VALUE str);
109static inline VALUE QUOTE(VALUE v);
110static inline VALUE QUOTE_ID(ID v);
111static inline bool is_ascii_string(VALUE str);
112static inline bool is_broken_string(VALUE str);
113static inline VALUE rb_str_eql_internal(const VALUE str1, const VALUE str2);
114
115RUBY_SYMBOL_EXPORT_BEGIN
116/* string.c (export) */
117VALUE rb_str_tmp_frozen_acquire(VALUE str);
118void rb_str_tmp_frozen_release(VALUE str, VALUE tmp);
119VALUE rb_setup_fake_str(struct RString *fake_str, const char *name, long len, rb_encoding *enc);
120RUBY_SYMBOL_EXPORT_END
121
122VALUE rb_fstring_new(const char *ptr, long len);
123void rb_gc_free_fstring(VALUE obj);
124bool rb_obj_is_fstring_table(VALUE obj);
125void Init_fstring_table();
126VALUE rb_obj_as_string_result(VALUE str, VALUE obj);
127VALUE rb_str_opt_plus(VALUE x, VALUE y);
128VALUE rb_str_concat_literals(size_t num, const VALUE *strary);
129VALUE rb_str_eql(VALUE str1, VALUE str2);
130VALUE rb_id_quote_unprintable(ID);
131VALUE rb_sym_proc_call(ID mid, int argc, const VALUE *argv, int kw_splat, VALUE passed_proc);
132VALUE rb_enc_literal_str(const char *ptr, long len, rb_encoding *enc);
133
135VALUE rb_ec_str_resurrect(struct rb_execution_context_struct *ec, VALUE str, bool chilled);
136
137#define rb_fstring_lit(str) rb_fstring_new((str), rb_strlen_lit(str))
138#define rb_fstring_literal(str) rb_fstring_lit(str)
139#define rb_fstring_enc_lit(str, enc) rb_fstring_enc_new((str), rb_strlen_lit(str), (enc))
140#define rb_fstring_enc_literal(str, enc) rb_fstring_enc_lit(str, enc)
141
142static inline VALUE
143QUOTE(VALUE v)
144{
145 return rb_str_quote_unprintable(v);
146}
147
148static inline VALUE
149QUOTE_ID(ID i)
150{
151 return rb_id_quote_unprintable(i);
152}
153
154static inline bool
155STR_EMBED_P(VALUE str)
156{
157 return ! FL_TEST_RAW(str, STR_NOEMBED);
158}
159
160static inline bool
161STR_SHARED_P(VALUE str)
162{
163 return FL_ALL_RAW(str, STR_NOEMBED | STR_SHARED);
164}
165
166static inline bool
167CHILLED_STRING_P(VALUE obj)
168{
169 return RB_TYPE_P(obj, T_STRING) && FL_TEST_RAW(obj, STR_CHILLED);
170}
171
172static inline void
173CHILLED_STRING_MUTATED(VALUE str)
174{
175 VALUE chilled_reason = RB_FL_TEST_RAW(str, STR_CHILLED);
176 FL_UNSET_RAW(str, STR_CHILLED);
177 switch (chilled_reason) {
178 case STR_CHILLED_SYMBOL_TO_S:
179 rb_warn_unchilled_symbol_to_s(str);
180 break;
181 case STR_CHILLED_LITERAL:
182 rb_warn_unchilled_literal(str);
183 break;
184 default:
185 rb_bug("RString was chilled for multiple reasons");
186 }
187}
188
189static inline bool
190is_ascii_string(VALUE str)
191{
192 return rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT;
193}
194
195static inline bool
196is_broken_string(VALUE str)
197{
198 return rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN;
199}
200
201static inline bool
202at_char_boundary(const char *s, const char *p, const char *e, rb_encoding *enc)
203{
204 return rb_enc_left_char_head(s, p, e, enc) == p;
205}
206
207static inline bool
208at_char_right_boundary(const char *s, const char *p, const char *e, rb_encoding *enc)
209{
210 RUBY_ASSERT(s <= p);
211 RUBY_ASSERT(p <= e);
212
213 return rb_enc_right_char_head(s, p, e, enc) == p;
214}
215
216/* expect tail call optimization */
217// YJIT needs this function to never allocate and never raise
218static inline VALUE
219rb_str_eql_internal(const VALUE str1, const VALUE str2)
220{
221 const long len = RSTRING_LEN(str1);
222 const char *ptr1, *ptr2;
223
224 if (len != RSTRING_LEN(str2)) return Qfalse;
225 if (!rb_str_comparable(str1, str2)) return Qfalse;
226 if ((ptr1 = RSTRING_PTR(str1)) == (ptr2 = RSTRING_PTR(str2)))
227 return Qtrue;
228 if (memcmp(ptr1, ptr2, len) == 0)
229 return Qtrue;
230 return Qfalse;
231}
232
233#if __has_builtin(__builtin_constant_p)
234# define rb_fstring_cstr(str) \
235 (__builtin_constant_p(str) ? \
236 rb_fstring_new((str), (long)strlen(str)) : \
237 (rb_fstring_cstr)(str))
238#endif
239#endif /* INTERNAL_STRING_H */
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition assert.h:219
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
Definition fl_type.h:404
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition coderange.h:180
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
Definition fl_type.h:130
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
Definition encoding.h:109
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
Definition fl_type.h:128
#define Qtrue
Old name of RUBY_Qtrue.
#define Qfalse
Old name of RUBY_Qfalse.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
Definition coderange.h:182
#define FL_ALL_RAW
Old name of RB_FL_ALL_RAW.
Definition fl_type.h:120
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
Definition encoding.h:108
Encoding relates APIs.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
Definition encoding.h:683
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
Definition encoding.h:704
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
Definition string.c:4228
int len
Length of the buffer.
Definition io.h:8
C99 shim for <stdbool.h>
Ruby's String.
Definition rstring.h:196
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition value_type.h:376