java_string/
char.rs

1use std::char::ParseCharError;
2use std::cmp::Ordering;
3use std::fmt;
4use std::fmt::{Debug, Display, Formatter, Write};
5use std::hash::{Hash, Hasher};
6use std::iter::{once, FusedIterator, Once};
7use std::ops::Range;
8use std::str::FromStr;
9
10use crate::validations::{TAG_CONT, TAG_FOUR_B, TAG_THREE_B, TAG_TWO_B};
11
12// JavaCodePoint is guaranteed to have the same repr as a u32, with valid values
13// of between 0 and 0x10FFFF, the same as a unicode code point. Surrogate code
14// points are valid values of this type.
15#[derive(Copy, Clone, PartialEq, Eq)]
16#[repr(C)]
17pub struct JavaCodePoint {
18    #[cfg(target_endian = "little")]
19    lower: u16,
20    upper: SeventeenValues,
21    #[cfg(target_endian = "big")]
22    lower: u16,
23}
24
25#[repr(u16)]
26#[derive(Copy, Clone, PartialEq, Eq)]
27#[allow(unused)]
28enum SeventeenValues {
29    V0,
30    V1,
31    V2,
32    V3,
33    V4,
34    V5,
35    V6,
36    V7,
37    V8,
38    V9,
39    V10,
40    V11,
41    V12,
42    V13,
43    V14,
44    V15,
45    V16,
46}
47
48impl JavaCodePoint {
49    pub const MAX: JavaCodePoint = JavaCodePoint::from_char(char::MAX);
50    pub const REPLACEMENT_CHARACTER: JavaCodePoint =
51        JavaCodePoint::from_char(char::REPLACEMENT_CHARACTER);
52
53    /// See [`char::from_u32`]
54    ///
55    /// ```
56    /// # use java_string::JavaCodePoint;
57    /// let c = JavaCodePoint::from_u32(0x2764);
58    /// assert_eq!(Some(JavaCodePoint::from_char('❤')), c);
59    ///
60    /// assert_eq!(None, JavaCodePoint::from_u32(0x110000));
61    /// ```
62    #[inline]
63    #[must_use]
64    pub const fn from_u32(i: u32) -> Option<JavaCodePoint> {
65        if i <= 0x10ffff {
66            unsafe { Some(Self::from_u32_unchecked(i)) }
67        } else {
68            None
69        }
70    }
71
72    /// # Safety
73    /// The argument must be within the valid Unicode code point range of 0 to
74    /// 0x10FFFF inclusive. Surrogate code points are allowed.
75    #[inline]
76    #[must_use]
77    pub const unsafe fn from_u32_unchecked(i: u32) -> JavaCodePoint {
78        // SAFETY: the caller checks that the argument can be represented by this type
79        std::mem::transmute(i)
80    }
81
82    /// Converts a `char` to a code point.
83    #[inline]
84    #[must_use]
85    pub const fn from_char(char: char) -> JavaCodePoint {
86        unsafe {
87            // SAFETY: all chars are valid code points
88            JavaCodePoint::from_u32_unchecked(char as u32)
89        }
90    }
91
92    /// Converts this code point to a `u32`.
93    ///
94    /// ```
95    /// # use java_string::JavaCodePoint;
96    /// assert_eq!(65, JavaCodePoint::from_char('A').as_u32());
97    /// assert_eq!(0xd800, JavaCodePoint::from_u32(0xd800).unwrap().as_u32());
98    /// ```
99    #[inline]
100    #[must_use]
101    pub const fn as_u32(self) -> u32 {
102        unsafe {
103            // SAFETY: JavaCodePoint has the same repr as a u32
104            let result = std::mem::transmute::<Self, u32>(self);
105
106            if result > 0x10ffff {
107                // SAFETY: JavaCodePoint can never have a value > 0x10FFFF.
108                // This statement may allow the optimizer to remove branches in the calling code
109                // associated with out of bounds chars.
110                std::hint::unreachable_unchecked();
111            }
112
113            result
114        }
115    }
116
117    /// Converts this code point to a `char`.
118    ///
119    /// ```
120    /// # use java_string::JavaCodePoint;
121    /// assert_eq!(Some('a'), JavaCodePoint::from_char('a').as_char());
122    /// assert_eq!(None, JavaCodePoint::from_u32(0xd800).unwrap().as_char());
123    /// ```
124    #[inline]
125    #[must_use]
126    pub const fn as_char(self) -> Option<char> {
127        char::from_u32(self.as_u32())
128    }
129
130    /// # Safety
131    /// The caller must ensure that this code point is not a surrogate code
132    /// point.
133    #[inline]
134    #[must_use]
135    pub unsafe fn as_char_unchecked(self) -> char {
136        char::from_u32_unchecked(self.as_u32())
137    }
138
139    /// See [`char::encode_utf16`]
140    ///
141    /// ```
142    /// # use java_string::JavaCodePoint;
143    /// assert_eq!(
144    ///     2,
145    ///     JavaCodePoint::from_char('𝕊')
146    ///         .encode_utf16(&mut [0; 2])
147    ///         .len()
148    /// );
149    /// assert_eq!(
150    ///     1,
151    ///     JavaCodePoint::from_u32(0xd800)
152    ///         .unwrap()
153    ///         .encode_utf16(&mut [0; 2])
154    ///         .len()
155    /// );
156    /// ```
157    /// ```should_panic
158    /// # use java_string::JavaCodePoint;
159    /// // Should panic
160    /// JavaCodePoint::from_char('𝕊').encode_utf16(&mut [0; 1]);
161    /// ```
162    #[inline]
163    pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
164        if let Some(char) = self.as_char() {
165            char.encode_utf16(dst)
166        } else {
167            dst[0] = self.as_u32() as u16;
168            &mut dst[..1]
169        }
170    }
171
172    /// Encodes this `JavaCodePoint` into semi UTF-8, that is, UTF-8 with
173    /// surrogate code points. See also [`char::encode_utf8`].
174    ///
175    /// ```
176    /// # use java_string::JavaCodePoint;
177    /// assert_eq!(
178    ///     2,
179    ///     JavaCodePoint::from_char('ß')
180    ///         .encode_semi_utf8(&mut [0; 4])
181    ///         .len()
182    /// );
183    /// assert_eq!(
184    ///     3,
185    ///     JavaCodePoint::from_u32(0xd800)
186    ///         .unwrap()
187    ///         .encode_semi_utf8(&mut [0; 4])
188    ///         .len()
189    /// );
190    /// ```
191    /// ```should_panic
192    /// # use java_string::JavaCodePoint;
193    /// // Should panic
194    /// JavaCodePoint::from_char('ß').encode_semi_utf8(&mut [0; 1]);
195    /// ```
196    #[inline]
197    pub fn encode_semi_utf8(self, dst: &mut [u8]) -> &mut [u8] {
198        let len = self.len_utf8();
199        let code = self.as_u32();
200        match (len, &mut dst[..]) {
201            (1, [a, ..]) => {
202                *a = code as u8;
203            }
204            (2, [a, b, ..]) => {
205                *a = ((code >> 6) & 0x1f) as u8 | TAG_TWO_B;
206                *b = (code & 0x3f) as u8 | TAG_CONT;
207            }
208            (3, [a, b, c, ..]) => {
209                *a = ((code >> 12) & 0x0f) as u8 | TAG_THREE_B;
210                *b = ((code >> 6) & 0x3f) as u8 | TAG_CONT;
211                *c = (code & 0x3f) as u8 | TAG_CONT;
212            }
213            (4, [a, b, c, d, ..]) => {
214                *a = ((code >> 18) & 0x07) as u8 | TAG_FOUR_B;
215                *b = ((code >> 12) & 0x3f) as u8 | TAG_CONT;
216                *c = ((code >> 6) & 0x3f) as u8 | TAG_CONT;
217                *d = (code & 0x3f) as u8 | TAG_CONT;
218            }
219            _ => panic!(
220                "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
221                len,
222                code,
223                dst.len()
224            ),
225        }
226        &mut dst[..len]
227    }
228
229    /// See [`char::eq_ignore_ascii_case`].
230    #[inline]
231    pub fn eq_ignore_ascii_case(&self, other: &JavaCodePoint) -> bool {
232        match (self.as_char(), other.as_char()) {
233            (Some(char1), Some(char2)) => char1.eq_ignore_ascii_case(&char2),
234            (None, None) => self == other,
235            _ => false,
236        }
237    }
238
239    /// See [`char::escape_debug`].
240    ///
241    /// ```
242    /// # use java_string::JavaCodePoint;
243    /// assert_eq!(
244    ///     "a",
245    ///     JavaCodePoint::from_char('a').escape_debug().to_string()
246    /// );
247    /// assert_eq!(
248    ///     "\\n",
249    ///     JavaCodePoint::from_char('\n').escape_debug().to_string()
250    /// );
251    /// assert_eq!(
252    ///     "\\u{d800}",
253    ///     JavaCodePoint::from_u32(0xd800)
254    ///         .unwrap()
255    ///         .escape_debug()
256    ///         .to_string()
257    /// );
258    /// ```
259    #[inline]
260    #[must_use]
261    pub fn escape_debug(self) -> CharEscapeIter {
262        self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
263    }
264
265    #[inline]
266    #[must_use]
267    pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> CharEscapeIter {
268        const NULL: u32 = '\0' as u32;
269        const TAB: u32 = '\t' as u32;
270        const CARRIAGE_RETURN: u32 = '\r' as u32;
271        const LINE_FEED: u32 = '\n' as u32;
272        const SINGLE_QUOTE: u32 = '\'' as u32;
273        const DOUBLE_QUOTE: u32 = '"' as u32;
274        const BACKSLASH: u32 = '\\' as u32;
275
276        unsafe {
277            // SAFETY: all characters specified are in ascii range
278            match self.as_u32() {
279                NULL => CharEscapeIter::new([b'\\', b'0']),
280                TAB => CharEscapeIter::new([b'\\', b't']),
281                CARRIAGE_RETURN => CharEscapeIter::new([b'\\', b'r']),
282                LINE_FEED => CharEscapeIter::new([b'\\', b'n']),
283                SINGLE_QUOTE if args.escape_single_quote => CharEscapeIter::new([b'\\', b'\'']),
284                DOUBLE_QUOTE if args.escape_double_quote => CharEscapeIter::new([b'\\', b'"']),
285                BACKSLASH => CharEscapeIter::new([b'\\', b'\\']),
286                _ if self.is_printable() => {
287                    // SAFETY: surrogate code points are not printable
288                    CharEscapeIter::printable(self.as_char_unchecked())
289                }
290                _ => self.escape_unicode(),
291            }
292        }
293    }
294
295    #[inline]
296    fn is_printable(self) -> bool {
297        let Some(char) = self.as_char() else {
298            return false;
299        };
300        if matches!(char, '\\' | '\'' | '"') {
301            return true;
302        }
303        char.escape_debug().next() != Some('\\')
304    }
305
306    /// See [`char::escape_default`].
307    ///
308    /// ```
309    /// # use java_string::JavaCodePoint;
310    /// assert_eq!(
311    ///     "a",
312    ///     JavaCodePoint::from_char('a').escape_default().to_string()
313    /// );
314    /// assert_eq!(
315    ///     "\\n",
316    ///     JavaCodePoint::from_char('\n').escape_default().to_string()
317    /// );
318    /// assert_eq!(
319    ///     "\\u{d800}",
320    ///     JavaCodePoint::from_u32(0xd800)
321    ///         .unwrap()
322    ///         .escape_default()
323    ///         .to_string()
324    /// );
325    /// ```
326    #[inline]
327    #[must_use]
328    pub fn escape_default(self) -> CharEscapeIter {
329        const TAB: u32 = '\t' as u32;
330        const CARRIAGE_RETURN: u32 = '\r' as u32;
331        const LINE_FEED: u32 = '\n' as u32;
332        const SINGLE_QUOTE: u32 = '\'' as u32;
333        const DOUBLE_QUOTE: u32 = '"' as u32;
334        const BACKSLASH: u32 = '\\' as u32;
335
336        unsafe {
337            // SAFETY: all characters specified are in ascii range
338            match self.as_u32() {
339                TAB => CharEscapeIter::new([b'\\', b't']),
340                CARRIAGE_RETURN => CharEscapeIter::new([b'\\', b'r']),
341                LINE_FEED => CharEscapeIter::new([b'\\', b'n']),
342                SINGLE_QUOTE => CharEscapeIter::new([b'\\', b'\'']),
343                DOUBLE_QUOTE => CharEscapeIter::new([b'\\', b'"']),
344                BACKSLASH => CharEscapeIter::new([b'\\', b'\\']),
345                0x20..=0x7e => CharEscapeIter::new([self.as_u32() as u8]),
346                _ => self.escape_unicode(),
347            }
348        }
349    }
350
351    /// See [`char::escape_unicode`].
352    ///
353    /// ```
354    /// # use java_string::JavaCodePoint;
355    /// assert_eq!(
356    ///     "\\u{2764}",
357    ///     JavaCodePoint::from_char('❤').escape_unicode().to_string()
358    /// );
359    /// assert_eq!(
360    ///     "\\u{d800}",
361    ///     JavaCodePoint::from_u32(0xd800)
362    ///         .unwrap()
363    ///         .escape_unicode()
364    ///         .to_string()
365    /// );
366    /// ```
367    #[inline]
368    #[must_use]
369    pub fn escape_unicode(self) -> CharEscapeIter {
370        let x = self.as_u32();
371
372        let mut arr = [0; 10];
373        arr[0] = b'\\';
374        arr[1] = b'u';
375        arr[2] = b'{';
376
377        let number_len = if x == 0 {
378            1
379        } else {
380            ((x.ilog2() >> 2) + 1) as usize
381        };
382        arr[3 + number_len] = b'}';
383        for hexit in 0..number_len {
384            arr[2 + number_len - hexit] = b"0123456789abcdef"[((x >> (hexit << 2)) & 15) as usize];
385        }
386
387        CharEscapeIter {
388            inner: EscapeIterInner::Escaped(EscapeIterEscaped {
389                bytes: arr,
390                range: 0..number_len + 4,
391            }),
392        }
393    }
394
395    /// See [`char::is_alphabetic`].
396    #[inline]
397    #[must_use]
398    pub fn is_alphabetic(self) -> bool {
399        self.as_char().is_some_and(|char| char.is_alphabetic())
400    }
401
402    /// See [`char::is_alphanumeric`].
403    #[inline]
404    #[must_use]
405    pub fn is_alphanumeric(self) -> bool {
406        self.as_char().is_some_and(|char| char.is_alphanumeric())
407    }
408
409    /// See [`char::is_ascii`].
410    #[inline]
411    #[must_use]
412    pub fn is_ascii(self) -> bool {
413        self.as_u32() <= 0x7f
414    }
415
416    /// See [`char::is_ascii_alphabetic`].
417    #[inline]
418    #[must_use]
419    pub const fn is_ascii_alphabetic(self) -> bool {
420        self.is_ascii_lowercase() || self.is_ascii_uppercase()
421    }
422
423    /// See [`char::is_ascii_alphanumeric`].
424    #[inline]
425    #[must_use]
426    pub const fn is_ascii_alphanumeric(self) -> bool {
427        self.is_ascii_alphabetic() || self.is_ascii_digit()
428    }
429
430    /// See [`char::is_ascii_control`].
431    #[inline]
432    #[must_use]
433    pub const fn is_ascii_control(self) -> bool {
434        matches!(self.as_u32(), 0..=0x1f | 0x7f)
435    }
436
437    /// See [`char::is_ascii_digit`].
438    #[inline]
439    #[must_use]
440    pub const fn is_ascii_digit(self) -> bool {
441        const ZERO: u32 = '0' as u32;
442        const NINE: u32 = '9' as u32;
443        matches!(self.as_u32(), ZERO..=NINE)
444    }
445
446    /// See [`char::is_ascii_graphic`].
447    #[inline]
448    #[must_use]
449    pub const fn is_ascii_graphic(self) -> bool {
450        matches!(self.as_u32(), 0x21..=0x7e)
451    }
452
453    /// See [`char::is_ascii_hexdigit`].
454    #[inline]
455    #[must_use]
456    pub const fn is_ascii_hexdigit(self) -> bool {
457        const LOWER_A: u32 = 'a' as u32;
458        const LOWER_F: u32 = 'f' as u32;
459        const UPPER_A: u32 = 'A' as u32;
460        const UPPER_F: u32 = 'F' as u32;
461        self.is_ascii_digit() || matches!(self.as_u32(), (LOWER_A..=LOWER_F) | (UPPER_A..=UPPER_F))
462    }
463
464    /// See [`char::is_ascii_lowercase`].
465    #[inline]
466    #[must_use]
467    pub const fn is_ascii_lowercase(self) -> bool {
468        const A: u32 = 'a' as u32;
469        const Z: u32 = 'z' as u32;
470        matches!(self.as_u32(), A..=Z)
471    }
472
473    /// See [`char::is_ascii_octdigit`].
474    #[inline]
475    #[must_use]
476    pub const fn is_ascii_octdigit(self) -> bool {
477        const ZERO: u32 = '0' as u32;
478        const SEVEN: u32 = '7' as u32;
479        matches!(self.as_u32(), ZERO..=SEVEN)
480    }
481
482    /// See [`char::is_ascii_punctuation`].
483    #[inline]
484    #[must_use]
485    pub const fn is_ascii_punctuation(self) -> bool {
486        matches!(
487            self.as_u32(),
488            (0x21..=0x2f) | (0x3a..=0x40) | (0x5b..=0x60) | (0x7b..=0x7e)
489        )
490    }
491
492    /// See [`char::is_ascii_uppercase`].
493    #[inline]
494    #[must_use]
495    pub const fn is_ascii_uppercase(self) -> bool {
496        const A: u32 = 'A' as u32;
497        const Z: u32 = 'Z' as u32;
498        matches!(self.as_u32(), A..=Z)
499    }
500
501    /// See [`char::is_ascii_whitespace`].
502    #[inline]
503    #[must_use]
504    pub const fn is_ascii_whitespace(self) -> bool {
505        const SPACE: u32 = ' ' as u32;
506        const HORIZONTAL_TAB: u32 = '\t' as u32;
507        const LINE_FEED: u32 = '\n' as u32;
508        const FORM_FEED: u32 = 0xc;
509        const CARRIAGE_RETURN: u32 = '\r' as u32;
510        matches!(
511            self.as_u32(),
512            SPACE | HORIZONTAL_TAB | LINE_FEED | FORM_FEED | CARRIAGE_RETURN
513        )
514    }
515
516    /// See [`char::is_control`].
517    #[inline]
518    #[must_use]
519    pub fn is_control(self) -> bool {
520        self.as_char().is_some_and(|char| char.is_control())
521    }
522
523    /// See [`char::is_digit`].
524    #[inline]
525    #[must_use]
526    pub fn is_digit(self, radix: u32) -> bool {
527        self.to_digit(radix).is_some()
528    }
529
530    /// See [`char::is_lowercase`].
531    #[inline]
532    #[must_use]
533    pub fn is_lowercase(self) -> bool {
534        self.as_char().is_some_and(|char| char.is_lowercase())
535    }
536
537    /// See [`char::is_numeric`].
538    #[inline]
539    #[must_use]
540    pub fn is_numeric(self) -> bool {
541        self.as_char().is_some_and(|char| char.is_numeric())
542    }
543
544    /// See [`char::is_uppercase`].
545    #[inline]
546    #[must_use]
547    pub fn is_uppercase(self) -> bool {
548        self.as_char().is_some_and(|char| char.is_uppercase())
549    }
550
551    /// See [`char::is_whitespace`].
552    #[inline]
553    #[must_use]
554    pub fn is_whitespace(self) -> bool {
555        self.as_char().is_some_and(|char| char.is_whitespace())
556    }
557
558    /// See [`char::len_utf16`]. Surrogate code points return 1.
559    ///
560    /// ```
561    /// # use java_string::JavaCodePoint;
562    ///
563    /// let n = JavaCodePoint::from_char('ß').len_utf16();
564    /// assert_eq!(n, 1);
565    ///
566    /// let len = JavaCodePoint::from_char('💣').len_utf16();
567    /// assert_eq!(len, 2);
568    ///
569    /// assert_eq!(1, JavaCodePoint::from_u32(0xd800).unwrap().len_utf16());
570    /// ```
571    #[inline]
572    #[must_use]
573    pub const fn len_utf16(self) -> usize {
574        if let Some(char) = self.as_char() {
575            char.len_utf16()
576        } else {
577            1 // invalid code points are encoded as 1 utf16 code point anyway
578        }
579    }
580
581    /// See [`char::len_utf8`]. Surrogate code points return 3.
582    ///
583    /// ```
584    /// # use java_string::JavaCodePoint;
585    ///
586    /// let len = JavaCodePoint::from_char('A').len_utf8();
587    /// assert_eq!(len, 1);
588    ///
589    /// let len = JavaCodePoint::from_char('ß').len_utf8();
590    /// assert_eq!(len, 2);
591    ///
592    /// let len = JavaCodePoint::from_char('ℝ').len_utf8();
593    /// assert_eq!(len, 3);
594    ///
595    /// let len = JavaCodePoint::from_char('💣').len_utf8();
596    /// assert_eq!(len, 4);
597    ///
598    /// let len = JavaCodePoint::from_u32(0xd800).unwrap().len_utf8();
599    /// assert_eq!(len, 3);
600    /// ```
601    #[inline]
602    #[must_use]
603    pub const fn len_utf8(self) -> usize {
604        if let Some(char) = self.as_char() {
605            char.len_utf8()
606        } else {
607            3 // invalid code points are all length 3 in semi-valid utf8
608        }
609    }
610
611    /// See [`char::make_ascii_lowercase`].
612    #[inline]
613    pub fn make_ascii_lowercase(&mut self) {
614        *self = self.to_ascii_lowercase();
615    }
616
617    /// See [`char::make_ascii_uppercase`].
618    #[inline]
619    pub fn make_ascii_uppercase(&mut self) {
620        *self = self.to_ascii_uppercase();
621    }
622
623    /// See [`char::to_ascii_lowercase`].
624    ///
625    /// ```
626    /// # use java_string::JavaCodePoint;
627    ///
628    /// let ascii = JavaCodePoint::from_char('A');
629    /// let non_ascii = JavaCodePoint::from_char('❤');
630    ///
631    /// assert_eq!('a', ascii.to_ascii_lowercase());
632    /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
633    /// ```
634    #[inline]
635    #[must_use]
636    pub const fn to_ascii_lowercase(self) -> JavaCodePoint {
637        if self.is_ascii_uppercase() {
638            unsafe {
639                // SAFETY: all lowercase chars are valid chars
640                Self::from_u32_unchecked(self.as_u32() + 32)
641            }
642        } else {
643            self
644        }
645    }
646
647    /// See [`char::to_ascii_uppercase`].
648    ///
649    /// ```
650    /// # use java_string::JavaCodePoint;
651    ///
652    /// let ascii = JavaCodePoint::from_char('a');
653    /// let non_ascii = JavaCodePoint::from_char('❤');
654    ///
655    /// assert_eq!('A', ascii.to_ascii_uppercase());
656    /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
657    /// ```
658    #[inline]
659    #[must_use]
660    pub const fn to_ascii_uppercase(self) -> JavaCodePoint {
661        if self.is_ascii_lowercase() {
662            unsafe {
663                // SAFETY: all uppercase chars are valid chars
664                Self::from_u32_unchecked(self.as_u32() - 32)
665            }
666        } else {
667            self
668        }
669    }
670
671    /// See [`char::to_digit`].
672    #[inline]
673    #[must_use]
674    pub const fn to_digit(self, radix: u32) -> Option<u32> {
675        if let Some(char) = self.as_char() {
676            char.to_digit(radix)
677        } else {
678            None
679        }
680    }
681
682    /// See [`char::to_lowercase`].
683    #[inline]
684    #[must_use]
685    pub fn to_lowercase(self) -> ToLowercase {
686        match self.as_char() {
687            Some(char) => ToLowercase::char(char.to_lowercase()),
688            None => ToLowercase::invalid(self),
689        }
690    }
691
692    /// See [`char::to_uppercase`].
693    #[inline]
694    #[must_use]
695    pub fn to_uppercase(self) -> ToUppercase {
696        match self.as_char() {
697            Some(char) => ToUppercase::char(char.to_uppercase()),
698            None => ToUppercase::invalid(self),
699        }
700    }
701}
702
703impl Debug for JavaCodePoint {
704    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
705        f.write_char('\'')?;
706        for c in self.escape_debug_ext(EscapeDebugExtArgs {
707            escape_single_quote: true,
708            escape_double_quote: false,
709        }) {
710            f.write_char(c)?;
711        }
712        f.write_char('\'')
713    }
714}
715
716impl Default for JavaCodePoint {
717    #[inline]
718    fn default() -> Self {
719        JavaCodePoint::from_char('\0')
720    }
721}
722
723impl Display for JavaCodePoint {
724    #[inline]
725    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
726        Display::fmt(&self.as_char().unwrap_or(char::REPLACEMENT_CHARACTER), f)
727    }
728}
729
730impl From<JavaCodePoint> for u32 {
731    #[inline]
732    fn from(value: JavaCodePoint) -> Self {
733        value.as_u32()
734    }
735}
736
737impl From<u8> for JavaCodePoint {
738    #[inline]
739    fn from(value: u8) -> Self {
740        JavaCodePoint::from_char(char::from(value))
741    }
742}
743
744impl FromStr for JavaCodePoint {
745    type Err = ParseCharError;
746
747    #[inline]
748    fn from_str(s: &str) -> Result<Self, Self::Err> {
749        char::from_str(s).map(JavaCodePoint::from_char)
750    }
751}
752
753impl Hash for JavaCodePoint {
754    #[inline]
755    fn hash<H: Hasher>(&self, state: &mut H) {
756        self.as_u32().hash(state)
757    }
758}
759
760impl Ord for JavaCodePoint {
761    #[inline]
762    fn cmp(&self, other: &Self) -> Ordering {
763        self.as_u32().cmp(&other.as_u32())
764    }
765}
766
767impl PartialOrd for JavaCodePoint {
768    #[inline]
769    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
770        Some(self.cmp(other))
771    }
772}
773
774impl PartialOrd<char> for JavaCodePoint {
775    #[inline]
776    fn partial_cmp(&self, other: &char) -> Option<Ordering> {
777        self.partial_cmp(&JavaCodePoint::from_char(*other))
778    }
779}
780
781impl PartialOrd<JavaCodePoint> for char {
782    #[inline]
783    fn partial_cmp(&self, other: &JavaCodePoint) -> Option<Ordering> {
784        JavaCodePoint::from_char(*self).partial_cmp(other)
785    }
786}
787
788impl PartialEq<char> for JavaCodePoint {
789    #[inline]
790    fn eq(&self, other: &char) -> bool {
791        self == &JavaCodePoint::from_char(*other)
792    }
793}
794
795impl PartialEq<JavaCodePoint> for char {
796    #[inline]
797    fn eq(&self, other: &JavaCodePoint) -> bool {
798        &JavaCodePoint::from_char(*self) == other
799    }
800}
801
802pub(crate) struct EscapeDebugExtArgs {
803    pub(crate) escape_single_quote: bool,
804    pub(crate) escape_double_quote: bool,
805}
806
807impl EscapeDebugExtArgs {
808    pub(crate) const ESCAPE_ALL: Self = Self {
809        escape_single_quote: true,
810        escape_double_quote: true,
811    };
812}
813
814#[derive(Clone, Debug)]
815pub struct CharEscapeIter {
816    inner: EscapeIterInner,
817}
818
819#[derive(Clone, Debug)]
820enum EscapeIterInner {
821    Printable(Once<char>),
822    Escaped(EscapeIterEscaped),
823}
824
825impl Display for EscapeIterInner {
826    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
827        match self {
828            EscapeIterInner::Printable(char) => char.clone().try_for_each(|ch| f.write_char(ch)),
829            EscapeIterInner::Escaped(escaped) => Display::fmt(escaped, f),
830        }
831    }
832}
833
834impl CharEscapeIter {
835    #[inline]
836    fn printable(char: char) -> Self {
837        CharEscapeIter {
838            inner: EscapeIterInner::Printable(once(char)),
839        }
840    }
841
842    /// # Safety
843    /// Assumes that the input byte array is ASCII
844    #[inline]
845    unsafe fn new<const N: usize>(bytes: [u8; N]) -> Self {
846        assert!(N <= 10, "Too many bytes in escape iter");
847        let mut ten_bytes = [0; 10];
848        ten_bytes[..N].copy_from_slice(&bytes);
849        CharEscapeIter {
850            inner: EscapeIterInner::Escaped(EscapeIterEscaped {
851                bytes: ten_bytes,
852                range: 0..N,
853            }),
854        }
855    }
856}
857
858impl Iterator for CharEscapeIter {
859    type Item = char;
860
861    #[inline]
862    fn next(&mut self) -> Option<Self::Item> {
863        match &mut self.inner {
864            EscapeIterInner::Printable(printable) => printable.next(),
865            EscapeIterInner::Escaped(escaped) => escaped.next(),
866        }
867    }
868
869    #[inline]
870    fn size_hint(&self) -> (usize, Option<usize>) {
871        match &self.inner {
872            EscapeIterInner::Printable(printable) => printable.size_hint(),
873            EscapeIterInner::Escaped(escaped) => escaped.size_hint(),
874        }
875    }
876}
877
878impl ExactSizeIterator for CharEscapeIter {
879    #[inline]
880    fn len(&self) -> usize {
881        match &self.inner {
882            EscapeIterInner::Printable(printable) => printable.len(),
883            EscapeIterInner::Escaped(escaped) => escaped.len(),
884        }
885    }
886}
887
888impl FusedIterator for CharEscapeIter {}
889
890impl Display for CharEscapeIter {
891    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
892        Display::fmt(&self.inner, f)
893    }
894}
895
896#[derive(Clone, Debug)]
897struct EscapeIterEscaped {
898    // SAFETY: all values must be in the ASCII range
899    bytes: [u8; 10],
900    // SAFETY: range must not be out of bounds for length 10
901    range: Range<usize>,
902}
903
904impl Iterator for EscapeIterEscaped {
905    type Item = char;
906
907    #[inline]
908    fn next(&mut self) -> Option<Self::Item> {
909        self.range.next().map(|index| unsafe {
910            // SAFETY: the range is never out of bounds for length 10
911            char::from(*self.bytes.get_unchecked(index))
912        })
913    }
914
915    #[inline]
916    fn size_hint(&self) -> (usize, Option<usize>) {
917        self.range.size_hint()
918    }
919
920    #[inline]
921    fn count(self) -> usize {
922        self.range.len()
923    }
924}
925
926impl ExactSizeIterator for EscapeIterEscaped {
927    #[inline]
928    fn len(&self) -> usize {
929        self.range.len()
930    }
931}
932
933impl FusedIterator for EscapeIterEscaped {}
934
935impl Display for EscapeIterEscaped {
936    #[inline]
937    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
938        let str = unsafe {
939            // SAFETY: all bytes are in ASCII range, and range is in bounds for length 10
940            std::str::from_utf8_unchecked(self.bytes.get_unchecked(self.range.clone()))
941        };
942        f.write_str(str)
943    }
944}
945
946pub type ToLowercase = CharIterDelegate<std::char::ToLowercase>;
947pub type ToUppercase = CharIterDelegate<std::char::ToUppercase>;
948
949#[derive(Debug, Clone)]
950pub struct CharIterDelegate<I>(CharIterDelegateInner<I>);
951
952impl<I> CharIterDelegate<I> {
953    #[inline]
954    fn char(iter: I) -> CharIterDelegate<I> {
955        CharIterDelegate(CharIterDelegateInner::Char(iter))
956    }
957
958    #[inline]
959    fn invalid(code_point: JavaCodePoint) -> CharIterDelegate<I> {
960        CharIterDelegate(CharIterDelegateInner::Invalid(Some(code_point).into_iter()))
961    }
962}
963
964#[derive(Debug, Clone)]
965enum CharIterDelegateInner<I> {
966    Char(I),
967    Invalid(std::option::IntoIter<JavaCodePoint>),
968}
969
970impl<I> Iterator for CharIterDelegate<I>
971where
972    I: Iterator<Item = char>,
973{
974    type Item = JavaCodePoint;
975
976    #[inline]
977    fn next(&mut self) -> Option<Self::Item> {
978        match &mut self.0 {
979            CharIterDelegateInner::Char(char_iter) => {
980                char_iter.next().map(JavaCodePoint::from_char)
981            }
982            CharIterDelegateInner::Invalid(code_point) => code_point.next(),
983        }
984    }
985
986    #[inline]
987    fn size_hint(&self) -> (usize, Option<usize>) {
988        match &self.0 {
989            CharIterDelegateInner::Char(char_iter) => char_iter.size_hint(),
990            CharIterDelegateInner::Invalid(code_point) => code_point.size_hint(),
991        }
992    }
993}
994
995impl<I> DoubleEndedIterator for CharIterDelegate<I>
996where
997    I: Iterator<Item = char> + DoubleEndedIterator,
998{
999    #[inline]
1000    fn next_back(&mut self) -> Option<Self::Item> {
1001        match &mut self.0 {
1002            CharIterDelegateInner::Char(char_iter) => {
1003                char_iter.next_back().map(JavaCodePoint::from_char)
1004            }
1005            CharIterDelegateInner::Invalid(code_point) => code_point.next_back(),
1006        }
1007    }
1008}
1009
1010impl<I> ExactSizeIterator for CharIterDelegate<I> where I: Iterator<Item = char> + ExactSizeIterator {}
1011
1012impl<I> FusedIterator for CharIterDelegate<I> where I: Iterator<Item = char> + FusedIterator {}