├── .gitignore ├── .rustfmt.toml ├── rust-toolchain.toml ├── README.md ├── Cargo.toml ├── .github └── workflows │ ├── release.yml │ └── ci.yml ├── LICENSE ├── src ├── data_url.rs ├── encoding.rs └── lib.rs └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | /.vscode 2 | /target 3 | -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 80 2 | tab_spaces = 2 3 | edition = "2021" 4 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "1.85.0" 3 | components = ["clippy", "rustfmt"] 4 | profile = "minimal" 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # deno_media_type 2 | 3 | [![](https://img.shields.io/crates/v/deno_media_type.svg)](https://crates.io/crates/deno_media_type) 4 | 5 | Media type used in Deno. 6 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "deno_media_type" 3 | version = "0.3.3" 4 | edition = "2021" 5 | description = "Media type used in Deno" 6 | homepage = "https://deno.land/" 7 | repository = "https://github.com/denoland/deno_media_type" 8 | documentation = "https://docs.rs/deno_media_type" 9 | authors = ["the Deno authors"] 10 | license = "MIT" 11 | 12 | [package.metadata.docs.rs] 13 | all-features = true 14 | 15 | [features] 16 | default = ["module_specifier"] 17 | decoding = ["encoding_rs"] 18 | data_url = ["dep:data-url", "url"] 19 | module_specifier = ["dep:data-url", "url"] 20 | 21 | [dependencies] 22 | data-url = { version = "0.3.0", optional = true } 23 | encoding_rs = { version = "0.8.33", optional = true } 24 | serde = { version = "1.0.130", features = ["derive", "rc"] } 25 | url = { version = "2.3.1", optional = true } 26 | 27 | [dev-dependencies] 28 | pretty_assertions = "1.0.0" 29 | serde_json = { version = "1.0.67", features = ["preserve_order"] } 30 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | releaseKind: 7 | description: 'Kind of release' 8 | default: 'minor' 9 | type: choice 10 | options: 11 | - patch 12 | - minor 13 | required: true 14 | 15 | jobs: 16 | rust: 17 | name: release 18 | runs-on: ubuntu-latest 19 | timeout-minutes: 30 20 | 21 | steps: 22 | - name: Clone repository 23 | uses: actions/checkout@v4 24 | with: 25 | token: ${{ secrets.DENOBOT_PAT }} 26 | 27 | - uses: denoland/setup-deno@v2 28 | - uses: dtolnay/rust-toolchain@stable 29 | 30 | - name: Tag and release 31 | env: 32 | GITHUB_TOKEN: ${{ secrets.DENOBOT_PAT }} 33 | GH_WORKFLOW_ACTOR: ${{ github.actor }} 34 | run: | 35 | git config user.email "denobot@users.noreply.github.com" 36 | git config user.name "denobot" 37 | deno run -A https://raw.githubusercontent.com/denoland/automation/0.15.0/tasks/publish_release.ts --${{github.event.inputs.releaseKind}} 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018-2025 the Deno authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | push: 7 | branches: [main] 8 | tags: 9 | - '*' 10 | workflow_dispatch: 11 | 12 | jobs: 13 | rust: 14 | name: deno_media_type-ubuntu-latest-release 15 | runs-on: ubuntu-latest 16 | timeout-minutes: 30 17 | 18 | env: 19 | CARGO_INCREMENTAL: 0 20 | GH_ACTIONS: 1 21 | RUST_BACKTRACE: full 22 | RUSTFLAGS: -D warnings 23 | 24 | steps: 25 | - name: Clone repository 26 | uses: actions/checkout@v4 27 | 28 | - uses: dsherret/rust-toolchain-file@v1 29 | 30 | - name: Cache 31 | uses: Swatinem/rust-cache@v2 32 | with: 33 | save-if: ${{ github.ref == 'refs/heads/main' }} 34 | 35 | - name: Format 36 | run: cargo fmt --all -- --check 37 | 38 | - name: Lint 39 | run: cargo clippy --all-targets --all-features 40 | 41 | - name: Build 42 | run: cargo build --all-targets --all-features 43 | - name: Build --no-default-features 44 | run: cargo build --no-default-features 45 | - name: Build (data_url) 46 | run: cargo build --no-default-features --features data_url 47 | - name: Build (decoding) 48 | run: cargo build --no-default-features --features decoding 49 | - name: Test 50 | run: cargo test --all-targets --all-features 51 | 52 | - name: Publish 53 | if: | 54 | github.repository == 'denoland/deno_media_type' && 55 | startsWith(github.ref, 'refs/tags/') 56 | env: 57 | CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} 58 | run: | 59 | cargo publish 60 | -------------------------------------------------------------------------------- /src/data_url.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2018-2025 the Deno authors. MIT license. 2 | 3 | use url::Url; 4 | 5 | use crate::MediaType; 6 | 7 | pub fn get_mime_type_charset(mime_type: &str) -> Option<&str> { 8 | mime_type 9 | .split(';') 10 | .skip(1) 11 | .map(str::trim) 12 | .find_map(|s| s.strip_prefix("charset=")) 13 | } 14 | 15 | #[derive(Debug, Clone)] 16 | pub struct RawDataUrl { 17 | pub mime_type: String, 18 | pub bytes: Vec, 19 | } 20 | 21 | impl RawDataUrl { 22 | pub fn parse(specifier: &Url) -> Result { 23 | use std::io::Error; 24 | use std::io::ErrorKind; 25 | 26 | fn unable_to_decode() -> Error { 27 | Error::new(ErrorKind::InvalidData, "Unable to decode data url.") 28 | } 29 | 30 | let url = data_url::DataUrl::process(specifier.as_str()) 31 | .map_err(|_| unable_to_decode())?; 32 | let (bytes, _) = url.decode_to_vec().map_err(|_| unable_to_decode())?; 33 | Ok(RawDataUrl { 34 | mime_type: url.mime_type().to_string(), 35 | bytes, 36 | }) 37 | } 38 | 39 | pub fn charset(&self) -> Option<&str> { 40 | get_mime_type_charset(&self.mime_type) 41 | } 42 | 43 | pub fn media_type(&self) -> MediaType { 44 | let mut content_types = self.mime_type.split(';'); 45 | let Some(content_type) = content_types.next() else { 46 | return MediaType::Unknown; 47 | }; 48 | MediaType::from_content_type( 49 | // this data url will be ignored when resolving the MediaType 50 | // as in this rare case the MediaType is determined solely based 51 | // on the provided content type 52 | &Url::parse("data:image/png;base64,").unwrap(), 53 | content_type, 54 | ) 55 | } 56 | 57 | #[cfg(feature = "decoding")] 58 | pub fn decode(self) -> Result { 59 | let charset = get_mime_type_charset(&self.mime_type).unwrap_or("utf-8"); 60 | crate::encoding::decode_owned_source(charset, self.bytes) 61 | } 62 | 63 | pub fn into_bytes_and_mime_type(self) -> (Vec, String) { 64 | (self.bytes, self.mime_type) 65 | } 66 | } 67 | 68 | #[cfg(test)] 69 | mod test { 70 | use super::*; 71 | 72 | #[test] 73 | fn test_parse_valid_data_url() { 74 | let valid_data_url = "data:text/plain;base64,SGVsbG8sIFdvcmxkIQ=="; 75 | let specifier = Url::parse(valid_data_url).unwrap(); 76 | let raw_data_url = RawDataUrl::parse(&specifier).unwrap(); 77 | assert_eq!(raw_data_url.mime_type, "text/plain"); 78 | assert_eq!(raw_data_url.bytes, b"Hello, World!"); 79 | } 80 | 81 | #[test] 82 | fn test_charset_with_valid_mime_type() { 83 | let raw_data_url = RawDataUrl { 84 | mime_type: "text/plain; charset=utf-8".to_string(), 85 | bytes: vec![], 86 | }; 87 | assert_eq!(raw_data_url.charset(), Some("utf-8")); 88 | } 89 | 90 | #[test] 91 | fn test_charset_with_no_charset_in_mime_type() { 92 | let raw_data_url = RawDataUrl { 93 | mime_type: "text/plain".to_string(), 94 | bytes: vec![], 95 | }; 96 | assert_eq!(raw_data_url.charset(), None); 97 | } 98 | 99 | #[test] 100 | fn test_media_type_with_known_type() { 101 | let raw_data_url = RawDataUrl { 102 | mime_type: "application/javascript;charset=utf-8".to_string(), 103 | bytes: vec![], 104 | }; 105 | assert_eq!(raw_data_url.media_type(), MediaType::JavaScript); 106 | } 107 | 108 | #[test] 109 | fn test_media_type_with_unknown_type() { 110 | let raw_data_url = RawDataUrl { 111 | mime_type: "unknown/unknown".to_string(), 112 | bytes: vec![], 113 | }; 114 | assert_eq!(raw_data_url.media_type(), MediaType::Unknown); 115 | } 116 | 117 | #[test] 118 | fn test_decode_with_valid_charset() { 119 | let raw_data_url = RawDataUrl { 120 | mime_type: "text/plain; charset=utf-8".to_string(), 121 | bytes: "Hello, World!".as_bytes().to_vec(), 122 | }; 123 | assert_eq!(raw_data_url.decode().unwrap(), "Hello, World!"); 124 | } 125 | 126 | #[test] 127 | fn test_decode_with_invalid_charset() { 128 | let raw_data_url = RawDataUrl { 129 | mime_type: "text/plain; charset=invalid-charset".to_string(), 130 | bytes: vec![], 131 | }; 132 | assert!(raw_data_url.decode().is_err()); 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "cfg-if" 7 | version = "1.0.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 10 | 11 | [[package]] 12 | name = "data-url" 13 | version = "0.3.0" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "41b319d1b62ffbd002e057f36bebd1f42b9f97927c9577461d855f3513c4289f" 16 | 17 | [[package]] 18 | name = "deno_media_type" 19 | version = "0.3.3" 20 | dependencies = [ 21 | "data-url", 22 | "encoding_rs", 23 | "pretty_assertions", 24 | "serde", 25 | "serde_json", 26 | "url", 27 | ] 28 | 29 | [[package]] 30 | name = "diff" 31 | version = "0.1.13" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" 34 | 35 | [[package]] 36 | name = "encoding_rs" 37 | version = "0.8.35" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" 40 | dependencies = [ 41 | "cfg-if", 42 | ] 43 | 44 | [[package]] 45 | name = "equivalent" 46 | version = "1.0.1" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" 49 | 50 | [[package]] 51 | name = "form_urlencoded" 52 | version = "1.2.0" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" 55 | dependencies = [ 56 | "percent-encoding", 57 | ] 58 | 59 | [[package]] 60 | name = "hashbrown" 61 | version = "0.14.0" 62 | source = "registry+https://github.com/rust-lang/crates.io-index" 63 | checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" 64 | 65 | [[package]] 66 | name = "idna" 67 | version = "0.4.0" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" 70 | dependencies = [ 71 | "unicode-bidi", 72 | "unicode-normalization", 73 | ] 74 | 75 | [[package]] 76 | name = "indexmap" 77 | version = "2.0.0" 78 | source = "registry+https://github.com/rust-lang/crates.io-index" 79 | checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" 80 | dependencies = [ 81 | "equivalent", 82 | "hashbrown", 83 | ] 84 | 85 | [[package]] 86 | name = "itoa" 87 | version = "1.0.9" 88 | source = "registry+https://github.com/rust-lang/crates.io-index" 89 | checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" 90 | 91 | [[package]] 92 | name = "percent-encoding" 93 | version = "2.3.0" 94 | source = "registry+https://github.com/rust-lang/crates.io-index" 95 | checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" 96 | 97 | [[package]] 98 | name = "pretty_assertions" 99 | version = "1.4.0" 100 | source = "registry+https://github.com/rust-lang/crates.io-index" 101 | checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" 102 | dependencies = [ 103 | "diff", 104 | "yansi", 105 | ] 106 | 107 | [[package]] 108 | name = "proc-macro2" 109 | version = "1.0.66" 110 | source = "registry+https://github.com/rust-lang/crates.io-index" 111 | checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" 112 | dependencies = [ 113 | "unicode-ident", 114 | ] 115 | 116 | [[package]] 117 | name = "quote" 118 | version = "1.0.33" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" 121 | dependencies = [ 122 | "proc-macro2", 123 | ] 124 | 125 | [[package]] 126 | name = "ryu" 127 | version = "1.0.15" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" 130 | 131 | [[package]] 132 | name = "serde" 133 | version = "1.0.186" 134 | source = "registry+https://github.com/rust-lang/crates.io-index" 135 | checksum = "9f5db24220c009de9bd45e69fb2938f4b6d2df856aa9304ce377b3180f83b7c1" 136 | dependencies = [ 137 | "serde_derive", 138 | ] 139 | 140 | [[package]] 141 | name = "serde_derive" 142 | version = "1.0.186" 143 | source = "registry+https://github.com/rust-lang/crates.io-index" 144 | checksum = "5ad697f7e0b65af4983a4ce8f56ed5b357e8d3c36651bf6a7e13639c17b8e670" 145 | dependencies = [ 146 | "proc-macro2", 147 | "quote", 148 | "syn", 149 | ] 150 | 151 | [[package]] 152 | name = "serde_json" 153 | version = "1.0.105" 154 | source = "registry+https://github.com/rust-lang/crates.io-index" 155 | checksum = "693151e1ac27563d6dbcec9dee9fbd5da8539b20fa14ad3752b2e6d363ace360" 156 | dependencies = [ 157 | "indexmap", 158 | "itoa", 159 | "ryu", 160 | "serde", 161 | ] 162 | 163 | [[package]] 164 | name = "syn" 165 | version = "2.0.29" 166 | source = "registry+https://github.com/rust-lang/crates.io-index" 167 | checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" 168 | dependencies = [ 169 | "proc-macro2", 170 | "quote", 171 | "unicode-ident", 172 | ] 173 | 174 | [[package]] 175 | name = "tinyvec" 176 | version = "1.6.0" 177 | source = "registry+https://github.com/rust-lang/crates.io-index" 178 | checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" 179 | dependencies = [ 180 | "tinyvec_macros", 181 | ] 182 | 183 | [[package]] 184 | name = "tinyvec_macros" 185 | version = "0.1.1" 186 | source = "registry+https://github.com/rust-lang/crates.io-index" 187 | checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" 188 | 189 | [[package]] 190 | name = "unicode-bidi" 191 | version = "0.3.13" 192 | source = "registry+https://github.com/rust-lang/crates.io-index" 193 | checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" 194 | 195 | [[package]] 196 | name = "unicode-ident" 197 | version = "1.0.11" 198 | source = "registry+https://github.com/rust-lang/crates.io-index" 199 | checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" 200 | 201 | [[package]] 202 | name = "unicode-normalization" 203 | version = "0.1.22" 204 | source = "registry+https://github.com/rust-lang/crates.io-index" 205 | checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" 206 | dependencies = [ 207 | "tinyvec", 208 | ] 209 | 210 | [[package]] 211 | name = "url" 212 | version = "2.4.0" 213 | source = "registry+https://github.com/rust-lang/crates.io-index" 214 | checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" 215 | dependencies = [ 216 | "form_urlencoded", 217 | "idna", 218 | "percent-encoding", 219 | ] 220 | 221 | [[package]] 222 | name = "yansi" 223 | version = "0.5.1" 224 | source = "registry+https://github.com/rust-lang/crates.io-index" 225 | checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" 226 | -------------------------------------------------------------------------------- /src/encoding.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2018-2025 the Deno authors. MIT license. 2 | 3 | pub const BOM_CHAR: char = '\u{FEFF}'; 4 | 5 | /// Strips the byte order mark if it exists from the provided text. 6 | pub fn strip_bom_mut(text: &mut String) { 7 | if text.starts_with(BOM_CHAR) { 8 | text.drain(..BOM_CHAR.len_utf8()); 9 | } 10 | } 11 | 12 | /// Attempts to detect the character encoding of the provided bytes. 13 | /// 14 | /// Supports UTF-8, UTF-16 Little Endian and UTF-16 Big Endian. 15 | #[cfg(feature = "url")] 16 | pub fn detect_charset(specifier: &url::Url, bytes: &'_ [u8]) -> &'static str { 17 | if specifier.scheme() == "file" { 18 | detect_charset_local_file(bytes) 19 | } else { 20 | "utf-8" 21 | } 22 | } 23 | 24 | /// Attempts to detect the character encoding of the provided bytes 25 | /// from a local file. This should NOT be used for remote bytes. Use 26 | /// `detect_charset` for that. 27 | /// 28 | /// Supports UTF-8, UTF-16 Little Endian and UTF-16 Big Endian. 29 | pub fn detect_charset_local_file(bytes: &'_ [u8]) -> &'static str { 30 | const UTF16_LE_BOM: &[u8] = b"\xFF\xFE"; 31 | const UTF16_BE_BOM: &[u8] = b"\xFE\xFF"; 32 | 33 | if bytes.starts_with(UTF16_LE_BOM) { 34 | "utf-16le" 35 | } else if bytes.starts_with(UTF16_BE_BOM) { 36 | "utf-16be" 37 | } else { 38 | // Assume everything else is utf-8 39 | "utf-8" 40 | } 41 | } 42 | 43 | #[cfg(feature = "decoding")] 44 | pub fn decode_owned_source( 45 | charset: &str, 46 | bytes: Vec, 47 | ) -> Result { 48 | match convert_to_utf8(&bytes, charset)? { 49 | std::borrow::Cow::Borrowed(text) => { 50 | if text.starts_with(BOM_CHAR) { 51 | Ok(text[BOM_CHAR.len_utf8()..].to_string()) 52 | } else { 53 | Ok( 54 | // SAFETY: we know it's a valid utf-8 string at this point 55 | unsafe { String::from_utf8_unchecked(bytes) }, 56 | ) 57 | } 58 | } 59 | std::borrow::Cow::Owned(mut text) => { 60 | strip_bom_mut(&mut text); 61 | Ok(text) 62 | } 63 | } 64 | } 65 | 66 | /// Decodes the source bytes into a string handling any encoding rules 67 | /// for local vs remote files and dealing with the charset. 68 | #[cfg(feature = "decoding")] 69 | pub fn decode_arc_source( 70 | charset: &str, 71 | bytes: std::sync::Arc<[u8]>, 72 | ) -> Result, std::io::Error> { 73 | decode_arc_source_detail(charset, bytes).map(|d| d.text) 74 | } 75 | 76 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 77 | pub enum DecodedArcSourceDetailKind { 78 | /// Data in the original `Arc<[u8]>` is equal to the `Arc`. 79 | Unchanged, 80 | /// Data in the `Arc<[u8]>` lost information when decoding to an `Arc`. 81 | Changed, 82 | /// Data in the `Arc<[u8]>` only had the UTF-8 BOM stripped. 83 | OnlyUtf8Bom, 84 | } 85 | 86 | #[cfg(feature = "decoding")] 87 | pub struct DecodedArcSourceDetail { 88 | pub text: std::sync::Arc, 89 | pub kind: DecodedArcSourceDetailKind, 90 | } 91 | 92 | /// Decodes the source bytes into a string handling any encoding rules 93 | /// for local vs remote files and dealing with the charset and returns 94 | /// the original bytes. 95 | /// 96 | /// Note: The text and bytes will point at the same data when no decoding 97 | /// is necessary. 98 | #[cfg(feature = "decoding")] 99 | pub fn decode_arc_source_detail( 100 | charset: &str, 101 | bytes: std::sync::Arc<[u8]>, 102 | ) -> Result { 103 | use std::sync::Arc; 104 | 105 | let (kind, text) = match convert_to_utf8(bytes.as_ref(), charset)? { 106 | std::borrow::Cow::Borrowed(text) => { 107 | if text.starts_with(BOM_CHAR) { 108 | ( 109 | DecodedArcSourceDetailKind::OnlyUtf8Bom, 110 | text[BOM_CHAR.len_utf8()..].to_string(), 111 | ) 112 | } else { 113 | return Ok(DecodedArcSourceDetail { 114 | kind: DecodedArcSourceDetailKind::Unchanged, 115 | // SAFETY: we know it's a valid utf-8 string at this point 116 | text: unsafe { 117 | let raw_ptr = Arc::into_raw(bytes); 118 | Arc::from_raw(std::mem::transmute::<*const [u8], *const str>( 119 | raw_ptr, 120 | )) 121 | }, 122 | }); 123 | } 124 | } 125 | std::borrow::Cow::Owned(mut text) => { 126 | strip_bom_mut(&mut text); 127 | (DecodedArcSourceDetailKind::Changed, text) 128 | } 129 | }; 130 | let text: Arc = Arc::from(text); 131 | Ok(DecodedArcSourceDetail { text, kind }) 132 | } 133 | 134 | /// Attempts to convert the provided bytes to a UTF-8 string. 135 | /// 136 | /// Supports all encodings supported by the encoding_rs crate, which includes 137 | /// all encodings specified in the WHATWG Encoding Standard, and only those 138 | /// encodings (see: ). 139 | #[cfg(feature = "decoding")] 140 | pub fn convert_to_utf8<'a>( 141 | bytes: &'a [u8], 142 | charset: &'_ str, 143 | ) -> Result, std::io::Error> { 144 | match encoding_rs::Encoding::for_label(charset.as_bytes()) { 145 | Some(encoding) => Ok(encoding.decode_without_bom_handling(bytes).0), 146 | None => Err(std::io::Error::new( 147 | std::io::ErrorKind::InvalidInput, 148 | format!("Unsupported charset: {charset}"), 149 | )), 150 | } 151 | } 152 | 153 | #[cfg(test)] 154 | mod test { 155 | 156 | use super::*; 157 | 158 | #[cfg(feature = "url")] 159 | mod detection_tests { 160 | use super::*; 161 | 162 | fn run_detection_test(test_data: &[u8], expected_charset: &str) { 163 | let detected_charset = detect_charset( 164 | &url::Url::parse("file:///file.txt").unwrap(), 165 | test_data, 166 | ); 167 | assert_eq!( 168 | expected_charset.to_lowercase(), 169 | detected_charset.to_lowercase() 170 | ); 171 | } 172 | 173 | #[test] 174 | fn run_detection_test_utf8_no_bom() { 175 | let test_data = "Hello UTF-8 it is \u{23F0} for Deno!" 176 | .to_owned() 177 | .into_bytes(); 178 | run_detection_test(&test_data, "utf-8"); 179 | } 180 | 181 | #[test] 182 | fn run_detection_test_utf16_little_endian() { 183 | let test_data = b"\xFF\xFEHello UTF-16LE".to_owned().to_vec(); 184 | run_detection_test(&test_data, "utf-16le"); 185 | } 186 | 187 | #[test] 188 | fn run_detection_test_utf16_big_endian() { 189 | let test_data = b"\xFE\xFFHello UTF-16BE".to_owned().to_vec(); 190 | run_detection_test(&test_data, "utf-16be"); 191 | } 192 | } 193 | 194 | #[test] 195 | fn strip_bom_mut_with_bom() { 196 | let mut text = format!("{BOM_CHAR}text"); 197 | strip_bom_mut(&mut text); 198 | assert_eq!(text, "text"); 199 | } 200 | 201 | #[test] 202 | fn strip_bom_mut_without_bom() { 203 | let mut text = "text".to_string(); 204 | strip_bom_mut(&mut text); 205 | assert_eq!(text, "text"); 206 | } 207 | 208 | #[cfg(feature = "decoding")] 209 | #[test] 210 | fn test_decoding_unsupported_charset() { 211 | let test_data = Vec::new(); 212 | let result = convert_to_utf8(&test_data, "utf-32le"); 213 | assert!(result.is_err()); 214 | let err = result.expect_err("Err expected"); 215 | assert!(err.kind() == std::io::ErrorKind::InvalidInput); 216 | } 217 | 218 | #[cfg(feature = "decoding")] 219 | #[test] 220 | fn test_decoding_invalid_utf8() { 221 | let test_data = b"\xFE\xFE\xFF\xFF".to_vec(); 222 | let result = convert_to_utf8(&test_data, "utf-8"); 223 | assert!(result.is_ok()); 224 | } 225 | 226 | #[cfg(feature = "decoding")] 227 | #[test] 228 | fn test_decode_owned_with_bom() { 229 | let bytes = format!("{}{}", BOM_CHAR, "Hello").into_bytes(); 230 | let text = decode_owned_source( 231 | detect_charset(&url::Url::parse("file:///file.txt").unwrap(), &bytes), 232 | bytes, 233 | ) 234 | .unwrap(); 235 | assert_eq!(text, "Hello"); 236 | } 237 | 238 | #[cfg(feature = "decoding")] 239 | #[test] 240 | fn test_decode_with_charset_with_bom() { 241 | let bytes = format!("{}{}", BOM_CHAR, "Hello").into_bytes(); 242 | let charset = "utf-8"; 243 | let detail = 244 | decode_arc_source_detail(charset, std::sync::Arc::from(bytes)).unwrap(); 245 | assert_eq!(detail.text.as_ref(), "Hello"); 246 | assert_eq!(detail.kind, DecodedArcSourceDetailKind::OnlyUtf8Bom); 247 | } 248 | 249 | #[cfg(feature = "decoding")] 250 | #[test] 251 | fn test_decode_with_charset_changed() { 252 | let bytes = vec![0x48, 0x65, 0xFF, 0x6C, 0x6F]; 253 | let charset = "utf-8"; 254 | let detail = 255 | decode_arc_source_detail(charset, std::sync::Arc::from(bytes)).unwrap(); 256 | assert_eq!(detail.text.as_ref(), "He�lo"); 257 | assert_eq!(detail.kind, DecodedArcSourceDetailKind::Changed); 258 | } 259 | 260 | #[cfg(feature = "decoding")] 261 | #[test] 262 | fn test_decode_with_charset_no_change() { 263 | let bytes = "Hello".to_string().into_bytes(); 264 | let charset = "utf-8"; 265 | let detail = 266 | decode_arc_source_detail(charset, std::sync::Arc::from(bytes)).unwrap(); 267 | assert_eq!(detail.text.as_ref(), "Hello"); 268 | assert_eq!(detail.kind, DecodedArcSourceDetailKind::Unchanged); 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2018-2025 the Deno authors. MIT license. 2 | 3 | #![deny(clippy::print_stderr)] 4 | #![deny(clippy::print_stdout)] 5 | 6 | use serde::Serialize; 7 | use serde::Serializer; 8 | use std::fmt; 9 | use std::path::Path; 10 | 11 | #[cfg(feature = "data_url")] 12 | pub mod data_url; 13 | pub mod encoding; 14 | 15 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 16 | pub enum MediaType { 17 | JavaScript, 18 | Jsx, 19 | Mjs, 20 | Cjs, 21 | TypeScript, 22 | Mts, 23 | Cts, 24 | Dts, 25 | Dmts, 26 | Dcts, 27 | Tsx, 28 | Css, 29 | Json, 30 | Jsonc, 31 | Json5, 32 | Html, 33 | Sql, 34 | Wasm, 35 | SourceMap, 36 | Unknown, 37 | } 38 | 39 | impl MediaType { 40 | /// Convert a MediaType to a `ts.Extension`. 41 | /// 42 | /// *NOTE* This is defined in TypeScript as a string based enum. Changes to 43 | /// that enum in TypeScript should be reflected here. 44 | pub fn as_ts_extension(&self) -> &'static str { 45 | match self { 46 | Self::JavaScript => ".js", 47 | Self::Jsx => ".jsx", 48 | Self::Mjs => ".mjs", 49 | Self::Cjs => ".cjs", 50 | Self::TypeScript => ".ts", 51 | Self::Mts => ".mts", 52 | Self::Cts => ".cts", 53 | Self::Dts => ".d.ts", 54 | Self::Dmts => ".d.mts", 55 | Self::Dcts => ".d.cts", 56 | Self::Tsx => ".tsx", 57 | Self::Css => ".css", 58 | Self::Json => ".json", 59 | Self::Jsonc => ".json", 60 | Self::Json5 => ".json", 61 | Self::Html => ".html", 62 | Self::Sql => ".sql", 63 | // We transform Wasm to a declaration file. 64 | Self::Wasm => ".d.mts", 65 | // TypeScript doesn't have an "source map", so we will treat SourceMap as 66 | // JS for mapping purposes, though in reality, it is unlikely to ever be 67 | // passed to the compiler. 68 | Self::SourceMap => ".js", 69 | // TypeScript doesn't have an "unknown", so we will treat unknowns as JS 70 | // for mapping purposes, though in reality, it is unlikely to ever be 71 | // passed to the compiler. 72 | Self::Unknown => ".js", 73 | } 74 | } 75 | 76 | /// Returns `None` only for `MediaType::Unknown`. 77 | /// There is no 1:1 mapping between content types and MediaType. 78 | /// Specifically, for some `MediaType m` 79 | /// ```ignore 80 | /// MediaType::from_content_type(module_specifier, m.as_content_type()) != m 81 | /// ``` 82 | pub fn as_content_type(&self) -> Option<&'static str> { 83 | // https://www.iana.org/assignments/media-types/media-types.xhtml 84 | // Web-specific with extensions: https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types 85 | match self { 86 | Self::JavaScript => Some("text/javascript"), 87 | Self::Jsx => Some("text/jsx"), 88 | Self::Mjs => Some("text/javascript"), 89 | Self::Cjs => Some("text/javascript"), 90 | Self::TypeScript => Some("text/typescript"), 91 | Self::Mts => Some("text/typescript"), 92 | Self::Cts => Some("text/typescript"), 93 | Self::Dts => Some("text/typescript"), 94 | Self::Dmts => Some("text/typescript"), 95 | Self::Dcts => Some("text/typescript"), 96 | Self::Tsx => Some("text/tsx"), 97 | Self::Css => Some("text/css"), 98 | Self::Json => Some("application/json"), 99 | Self::Jsonc => Some("application/jsonc"), 100 | Self::Json5 => Some("application/json5"), 101 | Self::Html => Some("text/html"), 102 | Self::Sql => Some("application/sql"), 103 | Self::Wasm => Some("application/wasm"), 104 | Self::SourceMap => Some("application/json"), 105 | Self::Unknown => None, 106 | } 107 | } 108 | 109 | /// Gets if this media type is for a TypeScript declaration file. 110 | pub fn is_declaration(&self) -> bool { 111 | match self { 112 | Self::Dts | Self::Dmts | Self::Dcts => true, 113 | Self::JavaScript 114 | | Self::Jsx 115 | | Self::Mjs 116 | | Self::Cjs 117 | | Self::TypeScript 118 | | Self::Mts 119 | | Self::Cts 120 | | Self::Tsx 121 | | Self::Css 122 | | Self::Json 123 | | Self::Jsonc 124 | | Self::Json5 125 | | Self::Html 126 | | Self::Sql 127 | | Self::Wasm 128 | | Self::SourceMap 129 | | Self::Unknown => false, 130 | } 131 | } 132 | 133 | /// If the media type can be emitted to JavaScript. 134 | pub fn is_emittable(&self) -> bool { 135 | match self { 136 | MediaType::TypeScript 137 | | MediaType::Mts 138 | | MediaType::Cts 139 | | MediaType::Jsx 140 | | MediaType::Tsx => true, 141 | MediaType::JavaScript 142 | | MediaType::Mjs 143 | | MediaType::Cjs 144 | | MediaType::Dts 145 | | MediaType::Dmts 146 | | MediaType::Dcts 147 | | MediaType::Css 148 | | MediaType::Json 149 | | MediaType::Jsonc 150 | | MediaType::Json5 151 | | MediaType::Html 152 | | MediaType::Sql 153 | | MediaType::Wasm 154 | | MediaType::SourceMap 155 | | MediaType::Unknown => false, 156 | } 157 | } 158 | 159 | /// Gets if the media type is a `.jsx` or `.tsx` file. 160 | pub fn is_jsx(&self) -> bool { 161 | match self { 162 | Self::Tsx | Self::Jsx => true, 163 | Self::TypeScript 164 | | Self::Mts 165 | | Self::Cts 166 | | Self::Dts 167 | | Self::Dmts 168 | | Self::Dcts 169 | | Self::Json 170 | | Self::Jsonc 171 | | Self::Json5 172 | | Self::Html 173 | | Self::Sql 174 | | Self::Wasm 175 | | Self::JavaScript 176 | | Self::Mjs 177 | | Self::Cjs 178 | | Self::Css 179 | | Self::SourceMap 180 | | Self::Unknown => false, 181 | } 182 | } 183 | 184 | /// Returns true if this media type provides types inherently. 185 | /// 186 | /// Examples are TypeScript, TSX, or DTS files. Wasm and JSON files are also 187 | /// considered typed. 188 | pub fn is_typed(&self) -> bool { 189 | match self { 190 | Self::TypeScript 191 | | Self::Mts 192 | | Self::Cts 193 | | Self::Dts 194 | | Self::Dmts 195 | | Self::Dcts 196 | | Self::Tsx 197 | | Self::Json 198 | | Self::Jsonc 199 | | Self::Json5 200 | | Self::Wasm => true, 201 | Self::JavaScript 202 | | Self::Jsx 203 | | Self::Mjs 204 | | Self::Cjs 205 | | Self::Css 206 | | Self::Html 207 | | Self::Sql 208 | | Self::SourceMap 209 | | Self::Unknown => false, 210 | } 211 | } 212 | 213 | #[cfg(feature = "url")] 214 | pub fn from_specifier_and_headers( 215 | specifier: &url::Url, 216 | maybe_headers: Option<&std::collections::HashMap>, 217 | ) -> Self { 218 | Self::from_specifier_and_content_type( 219 | specifier, 220 | maybe_headers.and_then(|h| h.get("content-type").map(|v| v.as_str())), 221 | ) 222 | } 223 | 224 | #[cfg(feature = "url")] 225 | pub fn from_specifier_and_content_type( 226 | specifier: &url::Url, 227 | maybe_content_type: Option<&str>, 228 | ) -> Self { 229 | if let Some(content_type) = maybe_content_type { 230 | MediaType::from_content_type(specifier, content_type) 231 | } else { 232 | MediaType::from_specifier(specifier) 233 | } 234 | } 235 | 236 | #[cfg(feature = "url")] 237 | pub fn from_content_type>( 238 | specifier: &url::Url, 239 | content_type: S, 240 | ) -> Self { 241 | let first_part = content_type 242 | .as_ref() 243 | .split(';') 244 | .next() 245 | .unwrap() 246 | .trim() 247 | .to_lowercase(); 248 | match first_part.as_str() { 249 | "application/typescript" 250 | | "text/typescript" 251 | | "video/vnd.dlna.mpeg-tts" 252 | | "video/mp2t" 253 | | "application/x-typescript" => { 254 | // This preserves legacy behavior, where if a file is served with a 255 | // content type of `application/javascript`, but it ends only with a `.ts` 256 | // we will assume that it is JavaScript and not TypeScript, but if it ends 257 | // with `.d.ts` we assume it is Dts. 258 | // 259 | // This handles situations where the file is transpiled on the server and 260 | // is explicitly providing a media type. 261 | map_js_like_extension(specifier, Self::TypeScript) 262 | } 263 | "application/javascript" 264 | | "text/javascript" 265 | | "application/ecmascript" 266 | | "text/ecmascript" 267 | | "application/x-javascript" 268 | | "application/node" => { 269 | map_js_like_extension(specifier, Self::JavaScript) 270 | } 271 | "text/jscript" => map_js_like_extension(specifier, Self::Jsx), 272 | "text/jsx" => Self::Jsx, 273 | "text/tsx" => Self::Tsx, 274 | "application/json" | "text/json" => Self::Json, 275 | s if s.ends_with("+json") => Self::Json, 276 | "application/jsonc" | "text/jsonc" => Self::Jsonc, 277 | "application/json5" | "text/json5" => Self::Json5, 278 | "application/wasm" => Self::Wasm, 279 | "text/css" => Self::Css, 280 | // Handle plain and possibly webassembly 281 | "text/plain" | "application/octet-stream" 282 | if specifier.scheme() != "data" => 283 | { 284 | Self::from_specifier(specifier) 285 | } 286 | _ => Self::Unknown, 287 | } 288 | } 289 | 290 | pub fn from_path(path: &Path) -> Self { 291 | match path.file_name().and_then(|f| f.to_str()) { 292 | Some(file_name) => Self::from_filename(file_name), 293 | None => Self::Unknown, 294 | } 295 | } 296 | 297 | pub fn from_filename(file_name: &str) -> Self { 298 | fn has_dts_file_stem(file_stem: &str) -> bool { 299 | // .ts files that contain .d. in the file name are always considered a typescript declaration file. 300 | // See: https://github.com/microsoft/TypeScript/issues/53319#issuecomment-1474174018 301 | file_stem.contains(".d.") 302 | } 303 | 304 | let first_dot_pos = match file_name.rfind('.') { 305 | Some(i) => i, 306 | None => return Self::Unknown, 307 | }; 308 | // using eq_ignore_ascii_case with if/elses seems to be ~40ns 309 | // slower here, so continue to use to_lowercase() 310 | let (file_stem, ext) = file_name.split_at(first_dot_pos + 1); 311 | let ext = ext.to_lowercase(); 312 | match ext.as_str() { 313 | "ts" => { 314 | if has_dts_file_stem(file_stem) { 315 | Self::Dts 316 | } else { 317 | Self::TypeScript 318 | } 319 | } 320 | "mts" => { 321 | if has_dts_file_stem(file_stem) { 322 | Self::Dmts 323 | } else { 324 | Self::Mts 325 | } 326 | } 327 | "cts" => { 328 | if has_dts_file_stem(file_stem) { 329 | Self::Dcts 330 | } else { 331 | Self::Cts 332 | } 333 | } 334 | "tsx" => Self::Tsx, 335 | "js" => Self::JavaScript, 336 | "jsx" => Self::Jsx, 337 | "mjs" => Self::Mjs, 338 | "cjs" => Self::Cjs, 339 | "css" => Self::Css, 340 | "json" => Self::Json, 341 | "jsonc" => Self::Jsonc, 342 | "json5" => Self::Json5, 343 | "wasm" => Self::Wasm, 344 | "map" => Self::SourceMap, 345 | _ => Self::Unknown, 346 | } 347 | } 348 | 349 | #[allow(clippy::should_implement_trait)] 350 | pub fn from_str(path: &str) -> Self { 351 | Self::from_path(Path::new(path)) 352 | } 353 | 354 | #[cfg(feature = "url")] 355 | pub fn from_specifier(specifier: &url::Url) -> MediaType { 356 | use ::data_url::DataUrl; 357 | 358 | if specifier.scheme() == "data" { 359 | if let Ok(data_url) = DataUrl::process(specifier.as_str()) { 360 | Self::from_content_type(specifier, data_url.mime_type().to_string()) 361 | } else { 362 | Self::Unknown 363 | } 364 | } else { 365 | match specifier_file_name(specifier) { 366 | Some(file_name) => Self::from_filename(file_name), 367 | None => Self::Unknown, 368 | } 369 | } 370 | } 371 | } 372 | 373 | impl Default for MediaType { 374 | fn default() -> Self { 375 | Self::Unknown 376 | } 377 | } 378 | 379 | impl Serialize for MediaType { 380 | fn serialize(&self, serializer: S) -> Result 381 | where 382 | S: Serializer, 383 | { 384 | Serialize::serialize(&self.to_string(), serializer) 385 | } 386 | } 387 | 388 | impl fmt::Display for MediaType { 389 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 390 | let value = match self { 391 | Self::JavaScript => "JavaScript", 392 | Self::Jsx => "JSX", 393 | Self::Mjs => "Mjs", 394 | Self::Cjs => "Cjs", 395 | Self::TypeScript => "TypeScript", 396 | Self::Mts => "Mts", 397 | Self::Cts => "Cts", 398 | Self::Dts => "Dts", 399 | Self::Dmts => "Dmts", 400 | Self::Dcts => "Dcts", 401 | Self::Tsx => "TSX", 402 | Self::Css => "Css", 403 | Self::Json => "Json", 404 | Self::Jsonc => "Jsonc", 405 | Self::Json5 => "Json5", 406 | Self::Html => "Html", 407 | Self::Sql => "Sql", 408 | Self::Wasm => "Wasm", 409 | Self::SourceMap => "SourceMap", 410 | Self::Unknown => "Unknown", 411 | }; 412 | write!(f, "{}", value) 413 | } 414 | } 415 | 416 | /// Used to augment media types by using the path part of a module specifier to 417 | /// resolve to a more accurate media type. 418 | #[cfg(feature = "url")] 419 | fn map_js_like_extension( 420 | specifier: &url::Url, 421 | default: MediaType, 422 | ) -> MediaType { 423 | let media_type = match specifier_file_name(specifier) { 424 | Some(file_name) => MediaType::from_filename(file_name), 425 | None => MediaType::Unknown, 426 | }; 427 | match media_type { 428 | MediaType::Jsx 429 | | MediaType::Mjs 430 | | MediaType::Cjs 431 | | MediaType::Tsx 432 | | MediaType::Dts 433 | | MediaType::Dmts 434 | | MediaType::Dcts => media_type, 435 | MediaType::Mts => { 436 | if default == MediaType::JavaScript { 437 | MediaType::Mjs 438 | } else { 439 | MediaType::Mts 440 | } 441 | } 442 | MediaType::Cts => { 443 | if default == MediaType::JavaScript { 444 | MediaType::Cjs 445 | } else { 446 | MediaType::Cts 447 | } 448 | } 449 | MediaType::JavaScript 450 | | MediaType::TypeScript 451 | | MediaType::Css 452 | | MediaType::Json 453 | | MediaType::Jsonc 454 | | MediaType::Json5 455 | | MediaType::Html 456 | | MediaType::Sql 457 | | MediaType::Wasm 458 | | MediaType::SourceMap 459 | | MediaType::Unknown => default, 460 | } 461 | } 462 | 463 | #[cfg(feature = "url")] 464 | fn specifier_file_name(specifier: &url::Url) -> Option<&str> { 465 | let path = specifier.path(); 466 | let path = if path.is_empty() { 467 | // ex. deno://lib.deno.d.ts 468 | specifier.domain()? 469 | } else { 470 | path 471 | }; 472 | let path = path.trim_end_matches('/'); 473 | if path.is_empty() { 474 | None 475 | } else { 476 | match path.rfind('/') { 477 | Some(last_slash_index) => Some(&path[last_slash_index + 1..]), 478 | None => Some(path), 479 | } 480 | } 481 | } 482 | 483 | /// Resolve a media type and optionally the charset from a module specifier and 484 | /// the value of a content type header. 485 | #[cfg(feature = "url")] 486 | pub fn resolve_media_type_and_charset_from_content_type<'a>( 487 | specifier: &url::Url, 488 | maybe_content_type: Option<&'a str>, 489 | ) -> (MediaType, Option<&'a str>) { 490 | if let Some(content_type) = maybe_content_type { 491 | let mut content_types = content_type.split(';'); 492 | let media_type = content_types 493 | .next() 494 | .map(|content_type| MediaType::from_content_type(specifier, content_type)) 495 | .unwrap_or(MediaType::Unknown); 496 | let charset = content_types 497 | .map(str::trim) 498 | .find_map(|s| s.strip_prefix("charset=")); 499 | 500 | (media_type, charset) 501 | } else { 502 | (MediaType::from_specifier(specifier), None) 503 | } 504 | } 505 | 506 | #[cfg(test)] 507 | mod tests { 508 | use super::*; 509 | 510 | #[cfg(feature = "module_specifier")] 511 | type ModuleSpecifier = url::Url; 512 | 513 | use serde_json::json; 514 | 515 | /// Normalize all intermediate components of the path (ie. remove "./" and "../" components). 516 | /// Similar to `fs::canonicalize()` but doesn't resolve symlinks. 517 | /// 518 | /// Taken from Cargo 519 | /// https://github.com/rust-lang/cargo/blob/af307a38c20a753ec60f0ad18be5abed3db3c9ac/src/cargo/util/paths.rs#L60-L85 520 | #[cfg(feature = "module_specifier")] 521 | fn normalize_path>(path: P) -> std::path::PathBuf { 522 | use std::path::Component; 523 | use std::path::PathBuf; 524 | 525 | let mut components = path.as_ref().components().peekable(); 526 | let mut ret = 527 | if let Some(c @ Component::Prefix(..)) = components.peek().cloned() { 528 | components.next(); 529 | PathBuf::from(c.as_os_str()) 530 | } else { 531 | PathBuf::new() 532 | }; 533 | 534 | for component in components { 535 | match component { 536 | Component::Prefix(..) => unreachable!(), 537 | Component::RootDir => { 538 | ret.push(component.as_os_str()); 539 | } 540 | Component::CurDir => {} 541 | Component::ParentDir => { 542 | ret.pop(); 543 | } 544 | Component::Normal(c) => { 545 | ret.push(c); 546 | } 547 | } 548 | } 549 | ret 550 | } 551 | 552 | /// Returns true if the input string starts with a sequence of characters 553 | /// that could be a valid URI scheme, like 'https:', 'git+ssh:' or 'data:'. 554 | /// 555 | /// According to RFC 3986 (https://tools.ietf.org/html/rfc3986#section-3.1), 556 | /// a valid scheme has the following format: 557 | /// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 558 | /// 559 | /// We additionally require the scheme to be at least 2 characters long, 560 | /// because otherwise a windows path like c:/foo would be treated as a URL, 561 | /// while no schemes with a one-letter name actually exist. 562 | #[cfg(feature = "module_specifier")] 563 | fn specifier_has_uri_scheme(specifier: &str) -> bool { 564 | let mut chars = specifier.chars(); 565 | let mut len = 0usize; 566 | // The first character must be a letter. 567 | match chars.next() { 568 | Some(c) if c.is_ascii_alphabetic() => len += 1, 569 | _ => return false, 570 | } 571 | // Second and following characters must be either a letter, number, 572 | // plus sign, minus sign, or dot. 573 | loop { 574 | match chars.next() { 575 | Some(c) if c.is_ascii_alphanumeric() || "+-.".contains(c) => len += 1, 576 | Some(':') if len >= 2 => return true, 577 | _ => return false, 578 | } 579 | } 580 | } 581 | 582 | #[cfg(feature = "module_specifier")] 583 | fn resolve_url(url_str: &str) -> ModuleSpecifier { 584 | ModuleSpecifier::parse(url_str).expect("Invalid url.") 585 | } 586 | 587 | #[cfg(feature = "module_specifier")] 588 | fn resolve_path(path_str: &str) -> ModuleSpecifier { 589 | let path = std::env::current_dir().unwrap().join(path_str); 590 | let path = normalize_path(path); 591 | ModuleSpecifier::from_file_path(path).expect("Invalid path.") 592 | } 593 | 594 | #[cfg(feature = "module_specifier")] 595 | fn resolve_url_or_path(specifier: &str) -> ModuleSpecifier { 596 | if specifier_has_uri_scheme(specifier) { 597 | resolve_url(specifier) 598 | } else { 599 | resolve_path(specifier) 600 | } 601 | } 602 | 603 | #[test] 604 | fn test_map_file_extension() { 605 | let fixtures = vec![ 606 | ("file:///a/b/c.ts", MediaType::TypeScript), 607 | ("foo/bar.ts", MediaType::TypeScript), 608 | ("foo/bar.TS", MediaType::TypeScript), 609 | ("foo/bar.mts", MediaType::Mts), 610 | ("foo/bar.cts", MediaType::Cts), 611 | ("foo/bar.tsx", MediaType::Tsx), 612 | ("foo/bar.d.ts", MediaType::Dts), 613 | ("foo/bar.d.mts", MediaType::Dmts), 614 | ("foo/bar.d.cts", MediaType::Dcts), 615 | ("foo/bar.d.css.ts", MediaType::Dts), 616 | ("foo/bar.js", MediaType::JavaScript), 617 | ("foo/bar.mjs", MediaType::Mjs), 618 | ("foo/bar.cjs", MediaType::Cjs), 619 | ("foo/bar.jsx", MediaType::Jsx), 620 | ("foo/bar.css", MediaType::Css), 621 | ("foo/bar.json", MediaType::Json), 622 | ("foo/bar.jsonc", MediaType::Jsonc), 623 | ("foo/bar.json5", MediaType::Json5), 624 | ("foo/bar.wasm", MediaType::Wasm), 625 | ("foo/bar.js.map", MediaType::SourceMap), 626 | ("foo/bar.txt", MediaType::Unknown), 627 | ]; 628 | 629 | for (path, expected) in fixtures { 630 | assert_eq!( 631 | MediaType::from_path(Path::new(path)), 632 | expected, 633 | "path: {}", 634 | path 635 | ); 636 | assert_eq!(MediaType::from_str(path), expected); 637 | } 638 | } 639 | 640 | #[cfg(feature = "module_specifier")] 641 | #[test] 642 | fn test_from_specifier() { 643 | let fixtures = vec![ 644 | ("file:///a/b/c.ts", MediaType::TypeScript), 645 | ("file:///a/b/c.mts", MediaType::Mts), 646 | ("file:///a/b/c.cts", MediaType::Cts), 647 | ("file:///a/b/c.js", MediaType::JavaScript), 648 | ("file:///a/b/c.mjs", MediaType::Mjs), 649 | ("file:///a/b/c.cjs", MediaType::Cjs), 650 | ("file:///a/b/c.txt", MediaType::Unknown), 651 | ("file:///lib.deno.d.ts", MediaType::Dts), 652 | ("file:///lib.deno.d.mts", MediaType::Dmts), 653 | ("file:///lib.deno.d.cts", MediaType::Dcts), 654 | ("file:///lib.deno.ts", MediaType::TypeScript), 655 | ("file:///file.d.css.ts", MediaType::Dts), 656 | ("file:///deno.js", MediaType::JavaScript), 657 | ("deno://lib.deno.d.ts", MediaType::Dts), 658 | ("deno://deno.ts", MediaType::TypeScript), 659 | ("deno://deno.js", MediaType::JavaScript), 660 | ("https://deno.land/x/mod.ts", MediaType::TypeScript), 661 | ("https://deno.land/x/mod.d.ts", MediaType::Dts), 662 | ("https://deno.land/x/mod.d.mts", MediaType::Dmts), 663 | ("https://deno.land/x/mod.d.cts", MediaType::Dcts), 664 | ("https://deno.land/x/mod.js", MediaType::JavaScript), 665 | ("https://deno.land/x/mod.txt", MediaType::Unknown), 666 | ("https://deno.land/x/mod.css", MediaType::Css), 667 | ("https://deno.land/x/mod.json", MediaType::Json), 668 | ("https://deno.land/x/mod.jsonc", MediaType::Jsonc), 669 | ("https://deno.land/x/mod.json5", MediaType::Json5), 670 | ("data:application/typescript;base64,ZXhwb3J0IGNvbnN0IGEgPSAiYSI7CgpleHBvcnQgZW51bSBBIHsKICBBLAogIEIsCiAgQywKfQo=", MediaType::TypeScript), 671 | ("data:application/javascript;base64,ZXhwb3J0IGNvbnN0IGEgPSAiYSI7CgpleHBvcnQgZW51bSBBIHsKICBBLAogIEIsCiAgQywKfQo=", MediaType::JavaScript), 672 | ("data:text/plain;base64,ZXhwb3J0IGNvbnN0IGEgPSAiYSI7CgpleHBvcnQgZW51bSBBIHsKICBBLAogIEIsCiAgQywKfQo=", MediaType::Unknown), 673 | ]; 674 | 675 | for (specifier, expected) in fixtures { 676 | let actual = resolve_url_or_path(specifier); 677 | assert_eq!( 678 | MediaType::from_specifier(&actual), 679 | expected, 680 | "specifier: {}", 681 | specifier 682 | ); 683 | 684 | assert_eq!( 685 | MediaType::from_specifier_and_headers(&actual, None), 686 | expected 687 | ); 688 | } 689 | } 690 | 691 | #[cfg(feature = "module_specifier")] 692 | #[test] 693 | fn test_from_content_type() { 694 | let fixtures = vec![ 695 | ( 696 | "https://deno.land/x/mod.ts", 697 | "application/typescript", 698 | MediaType::TypeScript, 699 | ), 700 | ( 701 | "https://deno.land/x/mod.ts", 702 | "application/javascript", 703 | MediaType::JavaScript, 704 | ), 705 | ( 706 | "https://deno.land/x/mod.mts", 707 | "application/javascript", 708 | MediaType::Mjs, 709 | ), 710 | ( 711 | "https://deno.land/x/mod.cts", 712 | "application/javascript", 713 | MediaType::Cjs, 714 | ), 715 | ( 716 | "https://deno.land/x/mod.mts", 717 | "application/typescript", 718 | MediaType::Mts, 719 | ), 720 | ( 721 | "https://deno.land/x/mod.cts", 722 | "application/typescript", 723 | MediaType::Cts, 724 | ), 725 | ( 726 | "https://deno.land/x/mod.d.ts", 727 | "application/typescript", 728 | MediaType::Dts, 729 | ), 730 | ( 731 | "https://deno.land/x/mod.d.ts", 732 | "application/javascript", 733 | MediaType::Dts, 734 | ), 735 | ( 736 | "https://deno.land/x/mod.d.mts", 737 | "application/typescript", 738 | MediaType::Dmts, 739 | ), 740 | ( 741 | "https://deno.land/x/mod.d.cts", 742 | "application/typescript", 743 | MediaType::Dcts, 744 | ), 745 | ("https://deno.land/x/mod.tsx", "text/tsx", MediaType::Tsx), 746 | ( 747 | "https://deno.land/x/mod.js", 748 | "application/javascript", 749 | MediaType::JavaScript, 750 | ), 751 | ( 752 | "https://deno.land/x/mod.js", 753 | "application/typescript", 754 | MediaType::TypeScript, 755 | ), 756 | ( 757 | "https://deno.land/x/mod.mjs", 758 | "application/javascript", 759 | MediaType::Mjs, 760 | ), 761 | ( 762 | "https://deno.land/x/mod.cjs", 763 | "application/javascript", 764 | MediaType::Cjs, 765 | ), 766 | ("https://deno.land/x/mod.jsx", "text/jsx", MediaType::Jsx), 767 | ( 768 | "https://deno.land/x/mod.ts", 769 | "text/plain", 770 | MediaType::TypeScript, 771 | ), 772 | ("https://deno.land/x/mod.mts", "text/plain", MediaType::Mts), 773 | ("https://deno.land/x/mod.cts", "text/plain", MediaType::Cts), 774 | ( 775 | "https://deno.land/x/mod.js", 776 | "text/plain", 777 | MediaType::JavaScript, 778 | ), 779 | ( 780 | "https://deno.land/x/mod.wasm", 781 | "text/plain", 782 | MediaType::Wasm, 783 | ), 784 | ( 785 | "https://deno.land/x/mod.jsx", 786 | "text/jscript", 787 | MediaType::Jsx, 788 | ), 789 | ("https://deno.land/x/mod.jsx", "text/css", MediaType::Css), 790 | ( 791 | "https://deno.land/x/mod.jsx", 792 | "application/json", 793 | MediaType::Json, 794 | ), 795 | ( 796 | "https://deno.land/x/mod.jsx", 797 | "application/jsonc", 798 | MediaType::Jsonc, 799 | ), 800 | ( 801 | "https://deno.land/x/mod.jsx", 802 | "application/json5", 803 | MediaType::Json5, 804 | ), 805 | ]; 806 | 807 | for (specifier, content_type, expected) in fixtures { 808 | let specifier = resolve_url_or_path(specifier); 809 | assert_eq!( 810 | MediaType::from_content_type(&specifier, content_type), 811 | expected, 812 | "specifier: {}, content_type: {}", 813 | specifier, 814 | content_type, 815 | ); 816 | 817 | let mut headers = std::collections::HashMap::::new(); 818 | headers.insert("content-type".to_string(), content_type.to_string()); 819 | assert_eq!( 820 | MediaType::from_specifier_and_headers(&specifier, Some(&headers)), 821 | expected 822 | ); 823 | } 824 | } 825 | 826 | #[test] 827 | fn test_serialization() { 828 | assert_eq!(json!(MediaType::JavaScript), json!("JavaScript")); 829 | assert_eq!(json!(MediaType::Mjs), json!("Mjs")); 830 | assert_eq!(json!(MediaType::Cjs), json!("Cjs")); 831 | assert_eq!(json!(MediaType::Jsx), json!("JSX")); 832 | assert_eq!(json!(MediaType::TypeScript), json!("TypeScript")); 833 | assert_eq!(json!(MediaType::Mts), json!("Mts")); 834 | assert_eq!(json!(MediaType::Dts), json!("Dts")); 835 | assert_eq!(json!(MediaType::Dmts), json!("Dmts")); 836 | assert_eq!(json!(MediaType::Dcts), json!("Dcts")); 837 | assert_eq!(json!(MediaType::Tsx), json!("TSX")); 838 | assert_eq!(json!(MediaType::Css), json!("Css")); 839 | assert_eq!(json!(MediaType::Json), json!("Json")); 840 | assert_eq!(json!(MediaType::Jsonc), json!("Jsonc")); 841 | assert_eq!(json!(MediaType::Json5), json!("Json5")); 842 | assert_eq!(json!(MediaType::Wasm), json!("Wasm")); 843 | assert_eq!(json!(MediaType::SourceMap), json!("SourceMap")); 844 | assert_eq!(json!(MediaType::Unknown), json!("Unknown")); 845 | } 846 | 847 | #[test] 848 | fn test_display() { 849 | assert_eq!(MediaType::JavaScript.to_string(), "JavaScript"); 850 | assert_eq!(MediaType::Mjs.to_string(), "Mjs"); 851 | assert_eq!(MediaType::Cjs.to_string(), "Cjs"); 852 | assert_eq!(MediaType::Jsx.to_string(), "JSX"); 853 | assert_eq!(MediaType::TypeScript.to_string(), "TypeScript"); 854 | assert_eq!(MediaType::Mts.to_string(), "Mts"); 855 | assert_eq!(MediaType::Cts.to_string(), "Cts"); 856 | assert_eq!(MediaType::Dts.to_string(), "Dts"); 857 | assert_eq!(MediaType::Dmts.to_string(), "Dmts"); 858 | assert_eq!(MediaType::Dcts.to_string(), "Dcts"); 859 | assert_eq!(MediaType::Tsx.to_string(), "TSX"); 860 | assert_eq!(MediaType::Css.to_string(), "Css"); 861 | assert_eq!(MediaType::Json.to_string(), "Json"); 862 | assert_eq!(MediaType::Jsonc.to_string(), "Jsonc"); 863 | assert_eq!(MediaType::Json5.to_string(), "Json5"); 864 | assert_eq!(MediaType::Wasm.to_string(), "Wasm"); 865 | assert_eq!(MediaType::SourceMap.to_string(), "SourceMap"); 866 | assert_eq!(MediaType::Unknown.to_string(), "Unknown"); 867 | } 868 | 869 | #[cfg(feature = "module_specifier")] 870 | macro_rules! file_url { 871 | ($path:expr) => { 872 | if cfg!(target_os = "windows") { 873 | concat!("file:///C:", $path) 874 | } else { 875 | concat!("file://", $path) 876 | } 877 | }; 878 | } 879 | 880 | #[cfg(feature = "module_specifier")] 881 | #[test] 882 | fn test_resolve_media_type_and_charset_from_content_type() { 883 | let fixtures = vec![ 884 | // Extension only 885 | (file_url!("/foo/bar.ts"), None, MediaType::TypeScript, None), 886 | (file_url!("/foo/bar.tsx"), None, MediaType::Tsx, None), 887 | (file_url!("/foo/bar.d.cts"), None, MediaType::Dcts, None), 888 | (file_url!("/foo/bar.d.mts"), None, MediaType::Dmts, None), 889 | (file_url!("/foo/bar.d.ts"), None, MediaType::Dts, None), 890 | (file_url!("/foo/bar.js"), None, MediaType::JavaScript, None), 891 | (file_url!("/foo/bar.jsx"), None, MediaType::Jsx, None), 892 | (file_url!("/foo/bar.json"), None, MediaType::Json, None), 893 | (file_url!("/foo/bar.jsonc"), None, MediaType::Jsonc, None), 894 | (file_url!("/foo/bar.json5"), None, MediaType::Json5, None), 895 | (file_url!("/foo/bar.wasm"), None, MediaType::Wasm, None), 896 | (file_url!("/foo/bar.cjs"), None, MediaType::Cjs, None), 897 | (file_url!("/foo/bar.mjs"), None, MediaType::Mjs, None), 898 | (file_url!("/foo/bar.cts"), None, MediaType::Cts, None), 899 | (file_url!("/foo/bar.mts"), None, MediaType::Mts, None), 900 | (file_url!("/foo/bar"), None, MediaType::Unknown, None), 901 | // Media type no extension 902 | ( 903 | "https://deno.land/x/mod", 904 | Some("application/typescript".to_string()), 905 | MediaType::TypeScript, 906 | None, 907 | ), 908 | ( 909 | "https://deno.land/x/mod", 910 | Some("text/typescript".to_string()), 911 | MediaType::TypeScript, 912 | None, 913 | ), 914 | ( 915 | "https://deno.land/x/mod", 916 | Some("video/vnd.dlna.mpeg-tts".to_string()), 917 | MediaType::TypeScript, 918 | None, 919 | ), 920 | ( 921 | "https://deno.land/x/mod", 922 | Some("video/mp2t".to_string()), 923 | MediaType::TypeScript, 924 | None, 925 | ), 926 | ( 927 | "https://deno.land/x/mod", 928 | Some("application/x-typescript".to_string()), 929 | MediaType::TypeScript, 930 | None, 931 | ), 932 | ( 933 | "https://deno.land/x/mod", 934 | Some("application/javascript".to_string()), 935 | MediaType::JavaScript, 936 | None, 937 | ), 938 | ( 939 | "https://deno.land/x/mod", 940 | Some("text/javascript".to_string()), 941 | MediaType::JavaScript, 942 | None, 943 | ), 944 | ( 945 | "https://deno.land/x/mod", 946 | Some("application/ecmascript".to_string()), 947 | MediaType::JavaScript, 948 | None, 949 | ), 950 | ( 951 | "https://deno.land/x/mod", 952 | Some("text/ecmascript".to_string()), 953 | MediaType::JavaScript, 954 | None, 955 | ), 956 | ( 957 | "https://deno.land/x/mod", 958 | Some("application/x-javascript".to_string()), 959 | MediaType::JavaScript, 960 | None, 961 | ), 962 | ( 963 | "https://deno.land/x/mod", 964 | Some("application/node".to_string()), 965 | MediaType::JavaScript, 966 | None, 967 | ), 968 | ( 969 | "https://deno.land/x/mod", 970 | Some("text/jsx".to_string()), 971 | MediaType::Jsx, 972 | None, 973 | ), 974 | ( 975 | "https://deno.land/x/mod", 976 | Some("text/tsx".to_string()), 977 | MediaType::Tsx, 978 | None, 979 | ), 980 | ( 981 | "https://deno.land/x/mod", 982 | Some("text/json".to_string()), 983 | MediaType::Json, 984 | None, 985 | ), 986 | ( 987 | "https://deno.land/x/mod", 988 | Some("text/json; charset=utf-8".to_string()), 989 | MediaType::Json, 990 | Some("utf-8".to_string()), 991 | ), 992 | ( 993 | "https://deno.land/x/mod", 994 | Some("application/geo+json".to_string()), 995 | MediaType::Json, 996 | None, 997 | ), 998 | ( 999 | "https://deno.land/x/mod", 1000 | Some("text/jsonc".to_string()), 1001 | MediaType::Jsonc, 1002 | None, 1003 | ), 1004 | ( 1005 | "https://deno.land/x/mod", 1006 | Some("text/json5".to_string()), 1007 | MediaType::Json5, 1008 | None, 1009 | ), 1010 | // Extension with media type 1011 | ( 1012 | "https://deno.land/x/mod.ts", 1013 | Some("text/plain".to_string()), 1014 | MediaType::TypeScript, 1015 | None, 1016 | ), 1017 | ( 1018 | "https://deno.land/x/mod.ts", 1019 | Some("foo/bar".to_string()), 1020 | MediaType::Unknown, 1021 | None, 1022 | ), 1023 | ( 1024 | "https://deno.land/x/mod.tsx", 1025 | Some("application/typescript".to_string()), 1026 | MediaType::Tsx, 1027 | None, 1028 | ), 1029 | ( 1030 | "https://deno.land/x/mod.tsx", 1031 | Some("application/javascript".to_string()), 1032 | MediaType::Tsx, 1033 | None, 1034 | ), 1035 | ( 1036 | "https://deno.land/x/mod.jsx", 1037 | Some("application/javascript".to_string()), 1038 | MediaType::Jsx, 1039 | None, 1040 | ), 1041 | ( 1042 | "https://deno.land/x/mod.jsx", 1043 | Some("application/x-typescript".to_string()), 1044 | MediaType::Jsx, 1045 | None, 1046 | ), 1047 | ( 1048 | "https://deno.land/x/mod.d.ts", 1049 | Some("application/javascript".to_string()), 1050 | MediaType::Dts, 1051 | None, 1052 | ), 1053 | ( 1054 | "https://deno.land/x/mod.d.ts", 1055 | Some("text/plain".to_string()), 1056 | MediaType::Dts, 1057 | None, 1058 | ), 1059 | ( 1060 | "https://deno.land/x/mod.d.ts", 1061 | Some("application/x-typescript".to_string()), 1062 | MediaType::Dts, 1063 | None, 1064 | ), 1065 | ]; 1066 | 1067 | for (specifier, maybe_content_type, media_type, maybe_charset) in fixtures { 1068 | let specifier = ModuleSpecifier::parse(specifier).unwrap(); 1069 | assert_eq!( 1070 | resolve_media_type_and_charset_from_content_type( 1071 | &specifier, 1072 | maybe_content_type.as_deref() 1073 | ), 1074 | (media_type, maybe_charset.as_deref()) 1075 | ); 1076 | } 1077 | } 1078 | } 1079 | --------------------------------------------------------------------------------