Merge remote-tracking branch 'origin/upstream'
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..fa1d432
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,6 @@
+# 0.3.3 (2024-04-06)
+
+* Impl `Display`, `Eq`, `PartialEq`, `Ord`, `PartialOrd`, and `Hash` for
+ the `Error` type.
+* Switch to the 2021 edition.
+* Add `ucs2_cstr!` macro.
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..69b0d7e
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,40 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+rust-version = "1.56"
+name = "ucs2"
+version = "0.3.3"
+authors = [
+ "Gabriel Majeri <[email protected]>",
+ "Fredrik Aleksander",
+ "Isaac Woods",
+]
+description = "UCS-2 decoding and encoding functions"
+readme = "README.md"
+keywords = [
+ "ucs2",
+ "no-std",
+ "encoding",
+]
+categories = [
+ "encoding",
+ "no-std",
+]
+license = "MPL-2.0"
+repository = "https://github.com/rust-osdev/ucs2-rs"
+
+[dependencies.bit_field]
+version = "0.10"
+
+[badges.maintenance]
+status = "passively-maintained"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..ac37922
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,17 @@
+[package]
+name = "ucs2"
+version = "0.3.3"
+authors = ["Gabriel Majeri <[email protected]>", "Fredrik Aleksander", "Isaac Woods"]
+description = "UCS-2 decoding and encoding functions"
+repository = "https://github.com/rust-osdev/ucs2-rs"
+keywords = ["ucs2", "no-std", "encoding"]
+categories = ["encoding", "no-std"]
+license = "MPL-2.0"
+edition = "2021"
+rust-version = "1.56"
+
+[dependencies]
+bit_field = "0.10"
+
+[badges]
+maintenance = { status = "passively-maintained" }
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..a612ad9
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,373 @@
+Mozilla Public License Version 2.0
+==================================
+
+1. Definitions
+--------------
+
+1.1. "Contributor"
+ means each individual or legal entity that creates, contributes to
+ the creation of, or owns Covered Software.
+
+1.2. "Contributor Version"
+ means the combination of the Contributions of others (if any) used
+ by a Contributor and that particular Contributor's Contribution.
+
+1.3. "Contribution"
+ means Covered Software of a particular Contributor.
+
+1.4. "Covered Software"
+ means Source Code Form to which the initial Contributor has attached
+ the notice in Exhibit A, the Executable Form of such Source Code
+ Form, and Modifications of such Source Code Form, in each case
+ including portions thereof.
+
+1.5. "Incompatible With Secondary Licenses"
+ means
+
+ (a) that the initial Contributor has attached the notice described
+ in Exhibit B to the Covered Software; or
+
+ (b) that the Covered Software was made available under the terms of
+ version 1.1 or earlier of the License, but not also under the
+ terms of a Secondary License.
+
+1.6. "Executable Form"
+ means any form of the work other than Source Code Form.
+
+1.7. "Larger Work"
+ means a work that combines Covered Software with other material, in
+ a separate file or files, that is not Covered Software.
+
+1.8. "License"
+ means this document.
+
+1.9. "Licensable"
+ means having the right to grant, to the maximum extent possible,
+ whether at the time of the initial grant or subsequently, any and
+ all of the rights conveyed by this License.
+
+1.10. "Modifications"
+ means any of the following:
+
+ (a) any file in Source Code Form that results from an addition to,
+ deletion from, or modification of the contents of Covered
+ Software; or
+
+ (b) any new file in Source Code Form that contains any Covered
+ Software.
+
+1.11. "Patent Claims" of a Contributor
+ means any patent claim(s), including without limitation, method,
+ process, and apparatus claims, in any patent Licensable by such
+ Contributor that would be infringed, but for the grant of the
+ License, by the making, using, selling, offering for sale, having
+ made, import, or transfer of either its Contributions or its
+ Contributor Version.
+
+1.12. "Secondary License"
+ means either the GNU General Public License, Version 2.0, the GNU
+ Lesser General Public License, Version 2.1, the GNU Affero General
+ Public License, Version 3.0, or any later versions of those
+ licenses.
+
+1.13. "Source Code Form"
+ means the form of the work preferred for making modifications.
+
+1.14. "You" (or "Your")
+ means an individual or a legal entity exercising rights under this
+ License. For legal entities, "You" includes any entity that
+ controls, is controlled by, or is under common control with You. For
+ purposes of this definition, "control" means (a) the power, direct
+ or indirect, to cause the direction or management of such entity,
+ whether by contract or otherwise, or (b) ownership of more than
+ fifty percent (50%) of the outstanding shares or beneficial
+ ownership of such entity.
+
+2. License Grants and Conditions
+--------------------------------
+
+2.1. Grants
+
+Each Contributor hereby grants You a world-wide, royalty-free,
+non-exclusive license:
+
+(a) under intellectual property rights (other than patent or trademark)
+ Licensable by such Contributor to use, reproduce, make available,
+ modify, display, perform, distribute, and otherwise exploit its
+ Contributions, either on an unmodified basis, with Modifications, or
+ as part of a Larger Work; and
+
+(b) under Patent Claims of such Contributor to make, use, sell, offer
+ for sale, have made, import, and otherwise transfer either its
+ Contributions or its Contributor Version.
+
+2.2. Effective Date
+
+The licenses granted in Section 2.1 with respect to any Contribution
+become effective for each Contribution on the date the Contributor first
+distributes such Contribution.
+
+2.3. Limitations on Grant Scope
+
+The licenses granted in this Section 2 are the only rights granted under
+this License. No additional rights or licenses will be implied from the
+distribution or licensing of Covered Software under this License.
+Notwithstanding Section 2.1(b) above, no patent license is granted by a
+Contributor:
+
+(a) for any code that a Contributor has removed from Covered Software;
+ or
+
+(b) for infringements caused by: (i) Your and any other third party's
+ modifications of Covered Software, or (ii) the combination of its
+ Contributions with other software (except as part of its Contributor
+ Version); or
+
+(c) under Patent Claims infringed by Covered Software in the absence of
+ its Contributions.
+
+This License does not grant any rights in the trademarks, service marks,
+or logos of any Contributor (except as may be necessary to comply with
+the notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+No Contributor makes additional grants as a result of Your choice to
+distribute the Covered Software under a subsequent version of this
+License (see Section 10.2) or under the terms of a Secondary License (if
+permitted under the terms of Section 3.3).
+
+2.5. Representation
+
+Each Contributor represents that the Contributor believes its
+Contributions are its original creation(s) or it has sufficient rights
+to grant the rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+This License is not intended to limit any rights You have under
+applicable copyright doctrines of fair use, fair dealing, or other
+equivalents.
+
+2.7. Conditions
+
+Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
+in Section 2.1.
+
+3. Responsibilities
+-------------------
+
+3.1. Distribution of Source Form
+
+All distribution of Covered Software in Source Code Form, including any
+Modifications that You create or to which You contribute, must be under
+the terms of this License. You must inform recipients that the Source
+Code Form of the Covered Software is governed by the terms of this
+License, and how they can obtain a copy of this License. You may not
+attempt to alter or restrict the recipients' rights in the Source Code
+Form.
+
+3.2. Distribution of Executable Form
+
+If You distribute Covered Software in Executable Form then:
+
+(a) such Covered Software must also be made available in Source Code
+ Form, as described in Section 3.1, and You must inform recipients of
+ the Executable Form how they can obtain a copy of such Source Code
+ Form by reasonable means in a timely manner, at a charge no more
+ than the cost of distribution to the recipient; and
+
+(b) You may distribute such Executable Form under the terms of this
+ License, or sublicense it under different terms, provided that the
+ license for the Executable Form does not attempt to limit or alter
+ the recipients' rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+You may create and distribute a Larger Work under terms of Your choice,
+provided that You also comply with the requirements of this License for
+the Covered Software. If the Larger Work is a combination of Covered
+Software with a work governed by one or more Secondary Licenses, and the
+Covered Software is not Incompatible With Secondary Licenses, this
+License permits You to additionally distribute such Covered Software
+under the terms of such Secondary License(s), so that the recipient of
+the Larger Work may, at their option, further distribute the Covered
+Software under the terms of either this License or such Secondary
+License(s).
+
+3.4. Notices
+
+You may not remove or alter the substance of any license notices
+(including copyright notices, patent notices, disclaimers of warranty,
+or limitations of liability) contained within the Source Code Form of
+the Covered Software, except that You may alter any license notices to
+the extent required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+You may choose to offer, and to charge a fee for, warranty, support,
+indemnity or liability obligations to one or more recipients of Covered
+Software. However, You may do so only on Your own behalf, and not on
+behalf of any Contributor. You must make it absolutely clear that any
+such warranty, support, indemnity, or liability obligation is offered by
+You alone, and You hereby agree to indemnify every Contributor for any
+liability incurred by such Contributor as a result of warranty, support,
+indemnity or liability terms You offer. You may include additional
+disclaimers of warranty and limitations of liability specific to any
+jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+---------------------------------------------------
+
+If it is impossible for You to comply with any of the terms of this
+License with respect to some or all of the Covered Software due to
+statute, judicial order, or regulation then You must: (a) comply with
+the terms of this License to the maximum extent possible; and (b)
+describe the limitations and the code they affect. Such description must
+be placed in a text file included with all distributions of the Covered
+Software under this License. Except to the extent prohibited by statute
+or regulation, such description must be sufficiently detailed for a
+recipient of ordinary skill to be able to understand it.
+
+5. Termination
+--------------
+
+5.1. The rights granted under this License will terminate automatically
+if You fail to comply with any of its terms. However, if You become
+compliant, then the rights granted under this License from a particular
+Contributor are reinstated (a) provisionally, unless and until such
+Contributor explicitly and finally terminates Your grants, and (b) on an
+ongoing basis, if such Contributor fails to notify You of the
+non-compliance by some reasonable means prior to 60 days after You have
+come back into compliance. Moreover, Your grants from a particular
+Contributor are reinstated on an ongoing basis if such Contributor
+notifies You of the non-compliance by some reasonable means, this is the
+first time You have received notice of non-compliance with this License
+from such Contributor, and You become compliant prior to 30 days after
+Your receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+infringement claim (excluding declaratory judgment actions,
+counter-claims, and cross-claims) alleging that a Contributor Version
+directly or indirectly infringes any patent, then the rights granted to
+You by any and all Contributors for the Covered Software under Section
+2.1 of this License shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all
+end user license agreements (excluding distributors and resellers) which
+have been validly granted by You or Your distributors under this License
+prior to termination shall survive termination.
+
+************************************************************************
+* *
+* 6. Disclaimer of Warranty *
+* ------------------------- *
+* *
+* Covered Software is provided under this License on an "as is" *
+* basis, without warranty of any kind, either expressed, implied, or *
+* statutory, including, without limitation, warranties that the *
+* Covered Software is free of defects, merchantable, fit for a *
+* particular purpose or non-infringing. The entire risk as to the *
+* quality and performance of the Covered Software is with You. *
+* Should any Covered Software prove defective in any respect, You *
+* (not any Contributor) assume the cost of any necessary servicing, *
+* repair, or correction. This disclaimer of warranty constitutes an *
+* essential part of this License. No use of any Covered Software is *
+* authorized under this License except under this disclaimer. *
+* *
+************************************************************************
+
+************************************************************************
+* *
+* 7. Limitation of Liability *
+* -------------------------- *
+* *
+* Under no circumstances and under no legal theory, whether tort *
+* (including negligence), contract, or otherwise, shall any *
+* Contributor, or anyone who distributes Covered Software as *
+* permitted above, be liable to You for any direct, indirect, *
+* special, incidental, or consequential damages of any character *
+* including, without limitation, damages for lost profits, loss of *
+* goodwill, work stoppage, computer failure or malfunction, or any *
+* and all other commercial damages or losses, even if such party *
+* shall have been informed of the possibility of such damages. This *
+* limitation of liability shall not apply to liability for death or *
+* personal injury resulting from such party's negligence to the *
+* extent applicable law prohibits such limitation. Some *
+* jurisdictions do not allow the exclusion or limitation of *
+* incidental or consequential damages, so this exclusion and *
+* limitation may not apply to You. *
+* *
+************************************************************************
+
+8. Litigation
+-------------
+
+Any litigation relating to this License may be brought only in the
+courts of a jurisdiction where the defendant maintains its principal
+place of business and such litigation shall be governed by laws of that
+jurisdiction, without reference to its conflict-of-law provisions.
+Nothing in this Section shall prevent a party's ability to bring
+cross-claims or counter-claims.
+
+9. Miscellaneous
+----------------
+
+This License represents the complete agreement concerning the subject
+matter hereof. If any provision of this License is held to be
+unenforceable, such provision shall be reformed only to the extent
+necessary to make it enforceable. Any law or regulation which provides
+that the language of a contract shall be construed against the drafter
+shall not be used to construe this License against a Contributor.
+
+10. Versions of the License
+---------------------------
+
+10.1. New Versions
+
+Mozilla Foundation is the license steward. Except as provided in Section
+10.3, no one other than the license steward has the right to modify or
+publish new versions of this License. Each version will be given a
+distinguishing version number.
+
+10.2. Effect of New Versions
+
+You may distribute the Covered Software under the terms of the version
+of the License under which You originally received the Covered Software,
+or under the terms of any subsequent version published by the license
+steward.
+
+10.3. Modified Versions
+
+If you create software not governed by this License, and you want to
+create a new license for such software, you may create and use a
+modified version of this License if you rename the license and remove
+any references to the name of the license steward (except to note that
+such modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary
+Licenses
+
+If You choose to distribute Source Code Form that is Incompatible With
+Secondary Licenses under the terms of this version of the License, the
+notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+-------------------------------------------
+
+ This Source Code Form is subject to the terms of the Mozilla Public
+ License, v. 2.0. If a copy of the MPL was not distributed with this
+ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular
+file, then You may include the notice in a location (such as a LICENSE
+file in a relevant directory) where a recipient would be likely to look
+for such a notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - "Incompatible With Secondary Licenses" Notice
+---------------------------------------------------------
+
+ This Source Code Form is "Incompatible With Secondary Licenses", as
+ defined by the Mozilla Public License, v. 2.0.
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..dcd7e62
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,20 @@
+name: "ucs2"
+description: "UCS-2 decoding and encoding functions"
+third_party {
+ identifier {
+ type: "crates.io"
+ value: "ucs2"
+ }
+ identifier {
+ type: "Archive"
+ value: "https://static.crates.io/crates/ucs2/ucs2-0.3.3.crate"
+ primary_source: true
+ }
+ version: "0.3.3"
+ license_type: RECIPROCAL
+ last_upgrade_date {
+ year: 2024
+ month: 11
+ day: 4
+ }
+}
diff --git a/MODULE_LICENSE_MPL b/MODULE_LICENSE_MPL
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_MPL
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..5a2b844
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1 @@
+include platform/prebuilts/rust:main:/OWNERS
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9b24f53
--- /dev/null
+++ b/README.md
@@ -0,0 +1,21 @@
+# ucs2-rs
+
+[](https://crates.io/crates/ucs2)
+[](https://docs.rs/ucs2)
+
+
+UCS-2 handling for Rust.
+
+Note that UCS-2 is the predecessor of [UTF-16](https://en.wikipedia.org/wiki/UTF-16).
+It is a **fixed-length** encoding, and it is used for things like [UEFI](http://www.uefi.org/).
+
+## History
+
+This crate arose out of the needs of the [`uefi-rs`](https://github.com/GabrielMajeri/uefi-rs) crate.
+The code was extracted and placed here for easier maintenance and easier reuse.
+
+Most of the initial code has been contributed by [FredrikAleksander](https://github.com/FredrikAleksander).
+
+## License
+
+Licensed under the Mozilla Public License 2.0. See the [LICENSE](LICENSE) file for the full text.
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..6d3aef1
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,227 @@
+//! Utility functions for the UCS-2 character encoding.
+
+#![no_std]
+#![deny(missing_docs)]
+#![deny(clippy::all)]
+
+mod macros;
+
+/// These need to be public for the `ucs2_cstr!` macro, but are not
+/// intended to be called directly.
+#[doc(hidden)]
+pub use macros::{str_num_ucs2_chars, str_to_ucs2};
+
+use bit_field::BitField;
+use core::fmt::{self, Display, Formatter};
+
+/// Possible errors returned by the API.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub enum Error {
+ /// Not enough space left in the output buffer.
+ BufferOverflow,
+ /// Input contained a character which cannot be represented in UCS-2.
+ MultiByte,
+}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ match self {
+ Self::BufferOverflow => f.write_str("output buffer is too small"),
+ Self::MultiByte => {
+ f.write_str("input contains a character which cannot be represented in UCS-2")
+ }
+ }
+ }
+}
+
+type Result<T> = core::result::Result<T, Error>;
+
+/// Value returned by `ucs2_from_utf8_at_offset`.
+struct Ucs2CharFromUtf8 {
+ /// UCS-2 character.
+ val: u16,
+ /// Number of bytes needed to encode the character in UTF-8.
+ num_bytes: u8,
+}
+
+/// Get a UCS-2 character from a UTF-8 byte slice at the given offset.
+///
+/// # Safety
+///
+/// The input `bytes` must be valid UTF-8.
+const unsafe fn ucs2_from_utf8_at_offset(bytes: &[u8], offset: usize) -> Result<Ucs2CharFromUtf8> {
+ let len = bytes.len();
+ let ch;
+ let ch_len;
+
+ if bytes[offset] & 0b1000_0000 == 0b0000_0000 {
+ ch = bytes[offset] as u16;
+ ch_len = 1;
+ } else if bytes[offset] & 0b1110_0000 == 0b1100_0000 {
+ // 2 byte codepoint
+ if offset + 1 >= len {
+ // safe: len is the length of bytes,
+ // and bytes is a direct view into the
+ // buffer of input, which in order to be a valid
+ // utf-8 string _must_ contain `i + 1`.
+ unsafe { core::hint::unreachable_unchecked() }
+ }
+
+ let a = (bytes[offset] & 0b0001_1111) as u16;
+ let b = (bytes[offset + 1] & 0b0011_1111) as u16;
+ ch = a << 6 | b;
+ ch_len = 2;
+ } else if bytes[offset] & 0b1111_0000 == 0b1110_0000 {
+ // 3 byte codepoint
+ if offset + 2 >= len || offset + 1 >= len {
+ // safe: impossible utf-8 string.
+ unsafe { core::hint::unreachable_unchecked() }
+ }
+
+ let a = (bytes[offset] & 0b0000_1111) as u16;
+ let b = (bytes[offset + 1] & 0b0011_1111) as u16;
+ let c = (bytes[offset + 2] & 0b0011_1111) as u16;
+ ch = a << 12 | b << 6 | c;
+ ch_len = 3;
+ } else if bytes[offset] & 0b1111_0000 == 0b1111_0000 {
+ return Err(Error::MultiByte); // UTF-16
+ } else {
+ // safe: impossible utf-8 string.
+ unsafe { core::hint::unreachable_unchecked() }
+ }
+
+ Ok(Ucs2CharFromUtf8 {
+ val: ch,
+ num_bytes: ch_len,
+ })
+}
+
+/// Encodes an input UTF-8 string into a UCS-2 string.
+///
+/// The returned `usize` represents the length of the returned buffer,
+/// measured in 2-byte characters.
+pub fn encode(input: &str, buffer: &mut [u16]) -> Result<usize> {
+ let buffer_size = buffer.len();
+ let mut i = 0;
+
+ encode_with(input, |ch| {
+ if i >= buffer_size {
+ Err(Error::BufferOverflow)
+ } else {
+ buffer[i] = ch;
+ i += 1;
+ Ok(())
+ }
+ })?;
+
+ Ok(i)
+}
+
+/// Encode UTF-8 string to UCS-2 with a custom callback function.
+///
+/// `output` is a function which receives every encoded character.
+pub fn encode_with<F>(input: &str, mut output: F) -> Result<()>
+where
+ F: FnMut(u16) -> Result<()>,
+{
+ let bytes = input.as_bytes();
+ let len = bytes.len();
+ let mut i = 0;
+
+ while i < len {
+ // SAFETY: `bytes` is valid UTF-8.
+ let ch = unsafe { ucs2_from_utf8_at_offset(bytes, i) }?;
+ i += usize::from(ch.num_bytes);
+ output(ch.val)?;
+ }
+ Ok(())
+}
+
+/// Decode UCS-2 string to UTF-8 with a custom callback function.
+///
+/// `output` is a function which receives every decoded character.
+/// Due to the nature of UCS-2, the function can receive an UTF-8 character
+/// of up to three bytes, for every input character.
+pub fn decode_with<F>(input: &[u16], mut output: F) -> Result<usize>
+where
+ F: FnMut(&[u8]) -> Result<()>,
+{
+ let mut written = 0;
+
+ for ch in input.iter() {
+ /*
+ * We need to find how many bytes of UTF-8 this UCS-2 code-point needs. Because UCS-2 can only encode
+ * the Basic Multilingual Plane, a maximum of three bytes are needed.
+ */
+ if (0x000..0x0080).contains(ch) {
+ output(&[*ch as u8])?;
+
+ written += 1;
+ } else if (0x0080..0x0800).contains(ch) {
+ let first = 0b1100_0000 + ch.get_bits(6..11) as u8;
+ let last = 0b1000_0000 + ch.get_bits(0..6) as u8;
+
+ output(&[first, last])?;
+
+ written += 2;
+ } else {
+ let first = 0b1110_0000 + ch.get_bits(12..16) as u8;
+ let mid = 0b1000_0000 + ch.get_bits(6..12) as u8;
+ let last = 0b1000_0000 + ch.get_bits(0..6) as u8;
+
+ output(&[first, mid, last])?;
+
+ written += 3;
+ }
+ }
+
+ Ok(written)
+}
+
+/// Decode an input UCS-2 string into a UTF-8 string.
+///
+/// The returned `usize` represents the length of the returned buffer,
+/// in bytes. Due to the nature of UCS-2, the output buffer could end up with
+/// three bytes for every character in the input buffer.
+pub fn decode(input: &[u16], output: &mut [u8]) -> Result<usize> {
+ let buffer_size = output.len();
+ let mut i = 0;
+
+ decode_with(input, |bytes| {
+ if bytes.len() == 1 {
+ // Can be encoded in a single byte
+ if i >= buffer_size {
+ return Err(Error::BufferOverflow);
+ }
+
+ output[i] = bytes[0];
+
+ i += 1;
+ } else if bytes.len() == 2 {
+ // Can be encoded two bytes
+ if i + 1 >= buffer_size {
+ return Err(Error::BufferOverflow);
+ }
+
+ output[i] = bytes[0];
+ output[i + 1] = bytes[1];
+
+ i += 2;
+ } else if bytes.len() == 3 {
+ // Can be encoded three bytes
+ if i + 2 >= buffer_size {
+ return Err(Error::BufferOverflow);
+ }
+
+ output[i] = bytes[0];
+ output[i + 1] = bytes[1];
+ output[i + 2] = bytes[2];
+
+ i += 3;
+ } else {
+ unreachable!("More than three bytes per UCS-2 character.");
+ }
+
+ Ok(())
+ })
+}
diff --git a/src/macros.rs b/src/macros.rs
new file mode 100644
index 0000000..b30f8bc
--- /dev/null
+++ b/src/macros.rs
@@ -0,0 +1,126 @@
+use crate::{ucs2_from_utf8_at_offset, Error};
+
+/// Count the number of UCS-2 characters in a string. Return an error if
+/// the string cannot be encoded in UCS-2.
+pub const fn str_num_ucs2_chars(s: &str) -> Result<usize, Error> {
+ let bytes = s.as_bytes();
+ let len = bytes.len();
+
+ let mut offset = 0;
+ let mut num_ucs2_chars = 0;
+
+ while offset < len {
+ // SAFETY: `bytes` is valid UTF-8.
+ match unsafe { ucs2_from_utf8_at_offset(bytes, offset) } {
+ Ok(ch) => {
+ offset += ch.num_bytes as usize;
+ num_ucs2_chars += 1;
+ }
+ Err(err) => {
+ return Err(err);
+ }
+ }
+ }
+
+ Ok(num_ucs2_chars)
+}
+
+/// Convert a `str` into a null-terminated UCS-2 character array.
+pub const fn str_to_ucs2<const N: usize>(s: &str) -> Result<[u16; N], Error> {
+ let bytes = s.as_bytes();
+ let len = bytes.len();
+
+ let mut output = [0; N];
+
+ let mut output_offset = 0;
+ let mut input_offset = 0;
+ while input_offset < len {
+ // SAFETY: `bytes` is valid UTF-8.
+ match unsafe { ucs2_from_utf8_at_offset(bytes, input_offset) } {
+ Ok(ch) => {
+ if ch.val == 0 {
+ panic!("interior null character");
+ } else {
+ output[output_offset] = ch.val;
+ output_offset += 1;
+ input_offset += ch.num_bytes as usize;
+ }
+ }
+ Err(err) => {
+ return Err(err);
+ }
+ }
+ }
+
+ // The output array must be one bigger than the converted string,
+ // to leave room for the trailing null character.
+ if output_offset + 1 != N {
+ panic!("incorrect array length");
+ }
+
+ Ok(output)
+}
+
+/// Encode a string as UCS-2 with a trailing null character.
+///
+/// The encoding is done at compile time, so the result can be used in a
+/// `const` item. The type returned by the macro is a `[u16; N]` array;
+/// to avoid having to specify what `N` is in a `const` item, take a
+/// reference and store it as `&[u16]`.
+///
+/// # Example
+///
+/// ```
+/// use ucs2::ucs2_cstr;
+///
+/// const S: &[u16] = &ucs2_cstr!("abc");
+/// assert_eq!(S, [97, 98, 99, 0]);
+/// ```
+#[macro_export]
+macro_rules! ucs2_cstr {
+ ($s:literal) => {{
+ // Use `const` values here to force errors to happen at compile
+ // time.
+
+ const NUM_CHARS: usize = match $crate::str_num_ucs2_chars($s) {
+ // Add one for the null char.
+ Ok(num) => num + 1,
+ Err(_) => panic!("input contains a character which cannot be represented in UCS-2"),
+ };
+
+ const VAL: [u16; NUM_CHARS] = match $crate::str_to_ucs2($s) {
+ Ok(val) => val,
+ // The string was already checked by `str_num_ucs2_chars`,
+ // so this error is unreachable.
+ Err(_) => {
+ unreachable!();
+ }
+ };
+ VAL
+ }};
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_str_num_chars() {
+ // Some of the strings here are from https://www.kermitproject.org/utf8.html.
+
+ // One-byte chars.
+ assert_eq!(str_num_ucs2_chars("abc"), Ok(3));
+ // Two-byte chars.
+ assert_eq!(str_num_ucs2_chars("Τη γλώσσα μου έδωσαν ελληνική"), Ok(29));
+ // Three-byte chars.
+ assert_eq!(str_num_ucs2_chars("ვეპხის ტყაოსანი შოთა რუსთაველი"), Ok(30));
+ // Four-byte chars.
+ assert_eq!(str_num_ucs2_chars("😎🔥"), Err(Error::MultiByte));
+ }
+
+ #[test]
+ fn test_ucs2_cstr() {
+ let s = ucs2_cstr!("abc");
+ assert_eq!(s, [97, 98, 99, 0]);
+ }
+}
diff --git a/tests/tests.rs b/tests/tests.rs
new file mode 100644
index 0000000..f2ab0f4
--- /dev/null
+++ b/tests/tests.rs
@@ -0,0 +1,75 @@
+use ucs2::{decode, decode_with, encode, ucs2_cstr, Error};
+
+#[test]
+fn encoding() {
+ let input = "őэ╋";
+ let mut buffer = [0u16; 3];
+
+ assert_eq!(encode(input, &mut buffer), Ok(3));
+ assert_eq!(buffer[..], [0x0151, 0x044D, 0x254B]);
+
+ let mut buffer = [0u16; 2];
+ assert_eq!(encode(input, &mut buffer), Err(Error::BufferOverflow));
+
+ let input = "😎";
+ assert_eq!(encode(input, &mut buffer), Err(Error::MultiByte));
+}
+
+#[test]
+fn decoding() {
+ let input = "$¢ह한";
+ let mut u16_buffer = [0u16; 4];
+ assert_eq!(encode(input, &mut u16_buffer), Ok(4));
+
+ let mut u8_buffer = [0u8; 9];
+ assert_eq!(decode(&u16_buffer, &mut u8_buffer), Ok(9));
+ assert_eq!(core::str::from_utf8(&u8_buffer[0..9]), Ok("$¢ह한"));
+
+ // `decode` has three branches that can return `BufferOverflow`,
+ // check each of them.
+ assert_eq!(
+ decode(&u16_buffer, &mut u8_buffer[..0]),
+ Err(Error::BufferOverflow)
+ );
+ assert_eq!(
+ decode(&u16_buffer, &mut u8_buffer[..1]),
+ Err(Error::BufferOverflow)
+ );
+ assert_eq!(
+ decode(&u16_buffer, &mut u8_buffer[..3]),
+ Err(Error::BufferOverflow)
+ );
+}
+
+#[test]
+fn decoding_with() {
+ let input = "$¢ह한";
+
+ let mut u16_buffer = [0u16; 4];
+ let result = encode(input, &mut u16_buffer);
+ assert_eq!(result.unwrap(), 4);
+
+ let mut u8_buffer = [0u8; 9];
+ let mut pos = 0;
+
+ let result = decode_with(&u16_buffer, |bytes| {
+ for byte in bytes.into_iter() {
+ u8_buffer[pos] = *byte;
+ pos += 1;
+ }
+
+ Ok(())
+ });
+
+ assert_eq!(result.unwrap(), 9);
+ assert_eq!(core::str::from_utf8(&u8_buffer[0..9]), Ok("$¢ह한"));
+}
+
+#[test]
+fn test_macro() {
+ const S1: [u16; 4] = ucs2_cstr!("abc");
+ const S2: &[u16] = &ucs2_cstr!("$¢ह한");
+
+ assert_eq!(S1, [97, 98, 99, 0]);
+ assert_eq!(S2, [36, 162, 2361, 54620, 0]);
+}