feat: add czv, czv-wasm, and czv-python (init release)

This commit is contained in:
rzmk 2024-06-19 22:37:33 -04:00
commit 9799ab694b
No known key found for this signature in database
40 changed files with 70383 additions and 0 deletions

1
.github/FUNDING.yml vendored Normal file
View file

@ -0,0 +1 @@
github: [rzmk]

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/target

955
Cargo.lock generated Normal file
View file

@ -0,0 +1,955 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "anes"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
name = "anyhow"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]]
name = "autocfg"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
[[package]]
name = "bumpalo"
version = "3.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]]
name = "cast"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "ciborium"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
dependencies = [
"ciborium-io",
"ciborium-ll",
"serde",
]
[[package]]
name = "ciborium-io"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
[[package]]
name = "ciborium-ll"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
dependencies = [
"ciborium-io",
"half",
]
[[package]]
name = "clap"
version = "3.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
dependencies = [
"bitflags 1.3.2",
"clap_lex",
"indexmap",
"textwrap",
]
[[package]]
name = "clap_lex"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
dependencies = [
"os_str_bytes",
]
[[package]]
name = "console_error_panic_hook"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc"
dependencies = [
"cfg-if",
"wasm-bindgen",
]
[[package]]
name = "criterion"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb"
dependencies = [
"anes",
"atty",
"cast",
"ciborium",
"clap",
"criterion-plot",
"itertools",
"lazy_static",
"num-traits",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_derive",
"serde_json",
"tinytemplate",
"walkdir",
]
[[package]]
name = "criterion-plot"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
dependencies = [
"cast",
"itertools",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
[[package]]
name = "crunchy"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "csv"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
dependencies = [
"memchr",
]
[[package]]
name = "czv"
version = "0.0.0"
dependencies = [
"anyhow",
"criterion",
"csv",
"typed-builder",
]
[[package]]
name = "czv-python"
version = "0.0.0"
dependencies = [
"anyhow",
"csv",
"pyo3",
"thiserror",
]
[[package]]
name = "czv-wasm"
version = "0.0.0"
dependencies = [
"anyhow",
"csv",
"thiserror",
"wasm-bindgen",
"wasm-bindgen-test",
]
[[package]]
name = "either"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
[[package]]
name = "half"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
dependencies = [
"cfg-if",
"crunchy",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
[[package]]
name = "indexmap"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown",
]
[[package]]
name = "indoc"
version = "2.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]]
name = "js-sys"
version = "0.3.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]]
name = "lock_api"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "memoffset"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
dependencies = [
"autocfg",
]
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "oorandom"
version = "11.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
[[package]]
name = "os_str_bytes"
version = "6.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1"
[[package]]
name = "parking_lot"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-targets",
]
[[package]]
name = "plotters"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3"
dependencies = [
"num-traits",
"plotters-backend",
"plotters-svg",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "plotters-backend"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7"
[[package]]
name = "plotters-svg"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705"
dependencies = [
"plotters-backend",
]
[[package]]
name = "portable-atomic"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0"
[[package]]
name = "proc-macro2"
version = "1.0.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23"
dependencies = [
"unicode-ident",
]
[[package]]
name = "pyo3"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8"
dependencies = [
"cfg-if",
"indoc",
"libc",
"memoffset",
"parking_lot",
"portable-atomic",
"pyo3-build-config",
"pyo3-ffi",
"pyo3-macros",
"unindent",
]
[[package]]
name = "pyo3-build-config"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50"
dependencies = [
"once_cell",
"target-lexicon",
]
[[package]]
name = "pyo3-ffi"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403"
dependencies = [
"libc",
"pyo3-build-config",
]
[[package]]
name = "pyo3-macros"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c"
dependencies = [
"proc-macro2",
"pyo3-macros-backend",
"quote",
"syn",
]
[[package]]
name = "pyo3-macros-backend"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c"
dependencies = [
"heck",
"proc-macro2",
"pyo3-build-config",
"quote",
"syn",
]
[[package]]
name = "quote"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rayon"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "redox_syscall"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd"
dependencies = [
"bitflags 2.5.0",
]
[[package]]
name = "regex"
version = "1.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
[[package]]
name = "ryu"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "scoped-tls"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "serde"
version = "1.0.203"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.203"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "smallvec"
version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
[[package]]
name = "syn"
version = "2.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "target-lexicon"
version = "0.12.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f"
[[package]]
name = "textwrap"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9"
[[package]]
name = "thiserror"
version = "1.0.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tinytemplate"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
dependencies = [
"serde",
"serde_json",
]
[[package]]
name = "typed-builder"
version = "0.18.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77739c880e00693faef3d65ea3aad725f196da38b22fdc7ea6ded6e1ce4d3add"
dependencies = [
"typed-builder-macro",
]
[[package]]
name = "typed-builder-macro"
version = "0.18.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f718dfaf347dcb5b983bfc87608144b0bad87970aebcbea5ce44d2a30c08e63"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "unindent"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.92"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.92"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0"
dependencies = [
"cfg-if",
"js-sys",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.92"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.92"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.92"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
[[package]]
name = "wasm-bindgen-test"
version = "0.3.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9bf62a58e0780af3e852044583deee40983e5886da43a271dd772379987667b"
dependencies = [
"console_error_panic_hook",
"js-sys",
"scoped-tls",
"wasm-bindgen",
"wasm-bindgen-futures",
"wasm-bindgen-test-macro",
]
[[package]]
name = "wasm-bindgen-test-macro"
version = "0.3.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "web-sys"
version = "0.3.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
dependencies = [
"windows-sys",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
[[package]]
name = "windows_i686_gnu"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
[[package]]
name = "windows_i686_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"

7
Cargo.toml Normal file
View file

@ -0,0 +1,7 @@
[workspace]
members = [
"czv",
"czv-wasm",
"czv-python"
]
resolver = "2"

108
README.md Normal file
View file

@ -0,0 +1,108 @@
# czv
czv is CSV content manipulation/analysis libraries with support for Rust, Python, and WebAssembly (JavaScript and TypeScript).
## Installation and examples
### Rust
```bash
cargo install czv
```
```rust
use czv::{
count::RowCount,
Result
};
fn main() -> Result<()> {
let data = "\
fruits,price
apple,2.50
banana,3.00
strawberry,1.50
";
let output = RowCount::new()
.file_data(data)
.execute()?;
println!("{output}"); // 3
Ok(())
}
```
### JavaScript/TypeScript (WebAssembly)
```bash
bun install czv
```
Or use `npm`, `pnpm`, or `yarn` instead of `bun`.
```js
import init, * as czv from "czv";
// Must run `await init()` or `initSync()` first for web use
await init();
const data = `fruits,price
apple,2.50
banana,3.00
strawberry,1.50`;
const output = czv.rowCount(data);
console.log(output);
```
### Python
```bash
pip install czv
```
```python
import czv
data = """fruits,price
apple,2.50
banana,3.00
strawberry,1.50"""
output = czv.row_count(data, False)
print(output)
```
## Available operations
| czv (Rust) | czv-wasm (JS/TS) | czv-python | Summary |
| ---------------------------------------- | -------------------------------------- | ----------------------------------------------- | ------------------------- |
| [`count::RowCount`](czv/src/count.rs) | [`rowCount`](czv-wasm/src/count.rs) | [`count.row_count`](czv-python/src/count.rs) | Get the number of rows |
| [`count::ColumnCount`](czv/src/count.rs) | [`columnCount`](czv-wasm/src/count.rs) | [`count.column_count`](czv-python/src/count.rs) | Get the number of columns |
## Development
Each package has its own `README.md` with more info for that particular package.
You may generate docs with:
```bash
cargo doc --no-deps --workspace --open
```
## Notes
czv is inspired by the command-line tools [xsv](https://github.com/BurntSushi/xsv) and [qsv](https://github.com/jqnatividad/qsv), but czv is not intended to cover all of their commands or features.
Not all provided libraries may be in sync at a given time. See the [available operations table](#available-operations) for a common operation list between libraries (not all implementations for a given operation may be in sync either, for example they may not have the same builder/function arguments).
Here are a few expected features for each provided operation:
- czv (Rust)
- Provide both a builder (recommended and common for conditional parameters) and a function
- Provide documentation (docstrings) in Markdown format
- czv-wasm (Web, JavaScript and TypeScript)
- Use camelCase for exported functions
- Include common browser support to run in-browser
- Provide documentation (dosctrings) in TSDoc format
- czv-python
- Provide documentation (docstrings) and type hints for IDEs when developers are using the Python library (sourced from [czv-python/czv.pyi](czv-python/czv.pyi)) in Markdown format

72
czv-python/.gitignore vendored Normal file
View file

@ -0,0 +1,72 @@
/target
# Byte-compiled / optimized / DLL files
__pycache__/
.pytest_cache/
*.py[cod]
# C extensions
*.so
# Distribution / packaging
.Python
.venv/
env/
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
include/
man/
venv/
*.egg-info/
.installed.cfg
*.egg
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
pip-selfcheck.json
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml
# Translations
*.mo
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
# Rope
.ropeproject
# Django stuff:
*.log
*.pot
.DS_Store
# Sphinx documentation
docs/_build/
# PyCharm
.idea/
# VSCode
.vscode/
# Pyenv
.python-version

16
czv-python/Cargo.toml Normal file
View file

@ -0,0 +1,16 @@
[package]
name = "czv-python"
version = "0.0.0"
authors = ["Mueez Khan"]
description = "Python library for performing CSV-related functions for data engineering and analysis."
repository = "https://github.com/rzmk/czv"
edition = "2021"
[lib]
crate-type = ["cdylib", "rlib"]
[dependencies]
anyhow = "1.0.86"
csv = "1.3.0"
pyo3 = { version = "0.21.2", features = ["extension-module"] }
thiserror = "1.0.61"

50
czv-python/README.md Normal file
View file

@ -0,0 +1,50 @@
# czv-python
Python library for [czv](https://github.com/rzmk/czv). czv is a library of utility functions for CSV-related data engineering and analysis tasks.
## Installation and example
```bash
pip install czv
```
```python
import czv
data = """fruits,price
apple,2.50
banana,3.00
strawberry,1.50"""
output = czv.row_count(data, False)
print(output)
```
## Development
You'll need to have [maturin](https://github.com/PyO3/maturin/) and [uv](https://github.com/astral-sh/uv) installed. Set up a local virtual environment in the `czv-python` folder by running:
```bash
uv venv
```
Make sure to activate the virtual environment (instructions should be provided in your terminal after running the previous command).
Once you've activated the virtual environment, install dependencies by running:
```bash
uv pip install -r requirements.txt
```
### Build package in local environment
```bash
maturin develop --uv --release
```
### Run tests
```bash
pytest
```

46
czv-python/czv.pyi Normal file
View file

@ -0,0 +1,46 @@
"""
# czv
Python library for [czv](https://github.com/rzmk/czv). CSV content manipulation and analysis.
## Install
```bash
pip install czv
```
## Example
```python
from czv import row_count
data = \"""fruits,price
apple,2.50
banana,3.00
strawberry,1.50\"""
output = row_count(data, False)
print(output)
```
"""
from typing import Optional
def row_count(file_data: str, include_header_row: Optional[bool]) -> int:
"""Returns a count of the total number of rows.
## Arguments
* `file_data` - CSV file data.
* `include_header_row` - Specify whether to include the header row (first row) in the row count. Default is false.
"""
def column_count(file_data: str) -> int:
"""Returns a count of the total number of columns (fields).
## Arguments
* `file_data` - CSV file data.
"""

View file

@ -0,0 +1,10 @@
import czv
data = """fruits,price
apple,2.50
banana,3.00
strawberry,1.50"""
output = czv.row_count(data, False)
print(output)

15
czv-python/pyproject.toml Normal file
View file

@ -0,0 +1,15 @@
[build-system]
requires = ["maturin>=1.6,<2.0"]
build-backend = "maturin"
[project]
name = "czv"
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dynamic = ["version"]
[tool.maturin]
features = ["pyo3/extension-module"]

View file

@ -0,0 +1,2 @@
maturin
pytest

33
czv-python/src/count.rs Normal file
View file

@ -0,0 +1,33 @@
use crate::Result;
use csv::ReaderBuilder;
use pyo3::pyfunction;
/// Returns a count of the total number of rows.
///
/// ## Arguments
///
/// * `file_data` - CSV file data.
/// * `include_header_row` - Specify whether to include the header row (first row) in the row count.
#[pyfunction]
pub fn row_count(file_data: String, include_header_row: Option<bool>) -> Result<usize> {
let mut rdr = ReaderBuilder::new();
rdr.has_headers(!include_header_row.unwrap_or(false));
return Ok(rdr.from_reader(file_data.as_bytes()).records().count());
}
/// Returns a count of the total number of columns (fields).
///
/// ## Arguments
///
/// * `file_data` - CSV file data.
#[pyfunction]
pub fn column_count(file_data: Option<String>) -> Result<usize> {
let rdr = ReaderBuilder::new();
if let Some(file_data) = file_data {
return Ok(rdr.from_reader(file_data.as_bytes()).headers()?.len());
} else {
bail!("Could not determine a file path or file data for column_count_builder.");
}
}

43
czv-python/src/lib.rs Normal file
View file

@ -0,0 +1,43 @@
use pyo3::prelude::*;
// Error-handling helpers
#[derive(thiserror::Error, Debug)]
#[error("{0}")]
pub struct CzvError(anyhow::Error);
impl From<pyo3::PyErr> for CzvError {
fn from(value: pyo3::PyErr) -> Self {
value.into()
}
}
impl From<csv::Error> for CzvError {
fn from(value: csv::Error) -> Self {
value.into()
}
}
impl From<CzvError> for pyo3::PyErr {
fn from(value: CzvError) -> Self {
value.into()
}
}
pub type Result<T> = anyhow::Result<T, CzvError>;
#[allow(unused_macros)]
macro_rules! bail {
($err:expr $(,)?) => {
return Err(crate::CzvError(anyhow::anyhow!($err)))
};
}
// Command imports
pub mod count;
#[pymodule]
fn czv(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(count::row_count, m)?)?;
m.add_function(wrap_pyfunction!(count::column_count, m)?)?;
Ok(())
}

View file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,4 @@
fruit,price
apple,2.50
banana,3.00
strawberry,1.50
1 fruit price
2 apple 2.50
3 banana 3.00
4 strawberry 1.50

View file

@ -0,0 +1,24 @@
import czv
import pytest
from .test_data import test_data
class TestCountFunc:
@pytest.mark.parametrize(
"file_name,expected",
[("fruits.csv", 3), ("constituents_altnames.csv", 33971)],
)
def test_count(self, file_name, expected):
"""Count the total number of non-header rows."""
result = czv.row_count(test_data[file_name].read_text())
assert result == expected
@pytest.mark.parametrize(
"file_name,expected",
[("fruits.csv", 4), ("constituents_altnames.csv", 33972)],
)
def test_include_header_row(self, file_name, expected):
"""Count the total number of rows including the header row."""
result = czv.row_count(test_data[file_name].read_text(), include_header_row=True)
assert result == expected

View file

@ -0,0 +1,7 @@
import pathlib
data_path = pathlib.Path(__file__).parent.resolve().joinpath("data")
test_data = {
file_name: data_path.joinpath(file_name)
for file_name in ["fruits.csv", "constituents_altnames.csv"]
}

17
czv-wasm/Cargo.toml Normal file
View file

@ -0,0 +1,17 @@
[package]
name = "czv-wasm"
version = "0.0.0"
authors = ["Mueez Khan"]
description = "WASM library for performing CSV-related functions for data engineering and analysis."
repository = "https://github.com/rzmk/czv"
edition = "2021"
[lib]
crate-type = ["cdylib", "rlib"]
[dependencies]
anyhow = "1.0.86"
csv = "1.3.0"
thiserror = "1.0.61"
wasm-bindgen = { version = "0.2" }
wasm-bindgen-test = { version = "0.3.42" }

47
czv-wasm/README.md Normal file
View file

@ -0,0 +1,47 @@
# czv-wasm
WebAssembly (JavaScript and TypeScript) library for [czv](https://github.com/rzmk/czv). czv is a library of utility functions for CSV-related data engineering and analysis tasks.
## Installation and example
```bash
bun install czv
```
Or use `npm`, `pnpm`, or `yarn` instead of `bun`.
```js
import init, * as czv from "czv";
// Must run `await init()` or `initSync()` first for web use
await init();
const data = `fruits,price
apple,2.50
banana,3.00
strawberry,1.50`;
const output = czv.rowCount(data);
console.log(output);
```
## Development
You must have [wasm-pack](https://rustwasm.github.io/wasm-pack/installer/) installed. If you have Cargo installed you may run:
```bash
cargo install wasm-pack
```
### Build WASM for web
```bash
wasm-pack build --release --target web
```
### Test WASM for browser
```bash
wasm-pack test --firefox --release
```
You may replace `--firefox` with another browser such as `--chrome` and `--safari`.

View file

@ -0,0 +1,69 @@
<!DOCTYPE html>
<html id="html" lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="pico.min.css" />
<title>czv demo</title>
</head>
<body style="padding:1rem">
<div style="display: flex; gap: 2rem; justify-content: space-between;">
<h1>czv playground</h1>
<button class="outline secondary" style="height: fit-content; padding: 0.25rem;" onclick="const html = document.getElementById('html');
if (html.getAttribute('data-theme') === 'light')
html.setAttribute('data-theme', 'dark')
else
html.setAttribute('data-theme', 'light')">Switch
page theme</button>
</div>
<p>Import a CSV file and get statistical data from running <a href="https://github.com/rzmk/czv">czv</a> in your
browser using WASM.</p>
<input type="file" id="upload" accept=".csv" class="hidden" />
<label for="upload" style="display: none;" id="progress">Loading...</label>
<table style="width: 100%;" class="striped">
<thead>
<tr>
<th style="width: 10rem">Output type</th>
<th>Output</th>
</tr>
</thead>
<tbody>
<tr>
<td>Row count</td>
<td id="row-count"></td>
</tr>
<tr>
<td>Column count</td>
<td id="column-count"></td>
</tr>
</tbody>
</table>
<script type="module">
const input = document.getElementById("upload")
const progress = document.getElementById("progress")
import init, * as czv from "../../pkg/czv.js";
// Must run `await init()` or `initSync()` first for web use
await init();
const fileReader = new FileReader()
fileReader.onloadstart = () => {
progress.style.display = "block";
}
fileReader.onloadend = () => {
document.getElementById("column-count").innerText = czv.columnCount(fileReader.result)
document.getElementById("row-count").innerText = czv.rowCount(fileReader.result)
progress.style.display = "none";
}
input.addEventListener("change", () => {
fileReader.readAsText(input.files[0])
})
</script>
</body>
</html>

File diff suppressed because one or more lines are too long

32
czv-wasm/src/count.rs Normal file
View file

@ -0,0 +1,32 @@
use crate::Result;
use csv::ReaderBuilder;
use wasm_bindgen::prelude::*;
/// Returns a count of the total number of rows.
///
/// @param {string} `file_data` CSV file data.
/// @param {boolean | undefined} `include_header_row` Specify whether to include the header row (first row) in the row count. Default is false.
/// @returns {number}
#[wasm_bindgen(skip_jsdoc, js_name = rowCount)]
pub fn row_count(file_data: String, include_header_row: Option<bool>) -> Result<usize> {
let mut rdr = ReaderBuilder::new();
rdr.has_headers(!include_header_row.unwrap_or(false));
return Ok(rdr.from_reader(file_data.as_bytes()).records().count());
}
/// Returns a count of the total number of columns (fields).
///
/// ## Arguments
///
/// @param {string} `file_data` CSV file data.
#[wasm_bindgen(skip_jsdoc, js_name = columnCount)]
pub fn column_count(file_data: Option<String>) -> Result<usize> {
let rdr = ReaderBuilder::new();
if let Some(file_data) = file_data {
return Ok(rdr.from_reader(file_data.as_bytes()).headers()?.len());
} else {
bail!("Could not determine a file path or file data for column_count_builder.");
}
}

30
czv-wasm/src/lib.rs Normal file
View file

@ -0,0 +1,30 @@
use wasm_bindgen::JsValue;
// Error-handling helpers
#[derive(thiserror::Error, Debug)]
#[error("{0}")]
pub struct CzvError(anyhow::Error);
impl From<csv::Error> for CzvError {
fn from(value: csv::Error) -> Self {
value.into()
}
}
impl Into<JsValue> for CzvError {
fn into(self) -> JsValue {
JsValue::from_str(self.to_string().as_str())
}
}
pub type Result<T> = anyhow::Result<T, CzvError>;
#[allow(unused_macros)]
macro_rules! bail {
($err:expr $(,)?) => {
return Err(crate::CzvError(anyhow::anyhow!($err)))
};
}
// Command imports
pub mod count;

19
czv-wasm/tests/count.rs Normal file
View file

@ -0,0 +1,19 @@
use czv_wasm;
use czv_wasm::Result;
use wasm_bindgen_test::*;
wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser);
#[test]
#[wasm_bindgen_test]
fn basic() -> Result<()> {
let result = czv_wasm::count::row_count(
"fruit,price
apple,2.00
banana,1.50
strawberry,3.00"
.to_string(),
Some(false),
)?;
assert_eq!(result, 3);
Ok(())
}

1
czv-wasm/tests/tests.rs Normal file
View file

@ -0,0 +1 @@
mod count;

3
czv/.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
/target
/.venv
/pkg

22
czv/Cargo.toml Normal file
View file

@ -0,0 +1,22 @@
[package]
name = "czv"
version = "0.0.0"
authors = ["Mueez Khan"]
description = "Rust library for performing CSV-related operations for data engineering and analysis."
repository = "https://github.com/rzmk/czv"
edition = "2021"
license = "MIT OR Apache-2.0"
keywords = ["csv", "library"]
categories = ["text-processing"]
[dependencies]
anyhow = "1.0.86"
csv = "1.3.0"
typed-builder = "0.18.2"
[dev-dependencies]
criterion = { version = "0.4", features = ["html_reports"] }
[[bench]]
name = "benches"
harness = false

56
czv/README.md Normal file
View file

@ -0,0 +1,56 @@
# czv
Rust library for [czv](https://github.com/rzmk/czv). czv is a library of utility functions for CSV-related data engineering and analysis tasks.
## Usage
You must have [Rust](https://www.rust-lang.org/tools/install) and Cargo installed (Cargo may be additionally installed when you install Rust with `rustup`).
To install `czv`, run:
```bash
cargo install czv
```
Let's say we want to print the total number of rows in a 4x3 CSV file `fruits.csv` including the header row:
```rust
use czv::{count::RowCount, Result};
fn main() -> Result<()> {
let data = r#"fruits,price
apple,2.50
banana,3.00
strawberry,1.50"#;
let output = RowCount::new()
.file_data(data)
.include_header_row(true)
.execute()?;
println!("{output}"); // 4
Ok(())
}
```
When ran, this should be the standard output printed in the terminal:
```console
4
```
## Tests
To run the tests, run:
```bash
cargo test
```
## Benchmarks
To run the benchmarks, run:
```bash
cargo bench
```
For benchmarks we use [criterion.rs](https://github.com/bheisler/criterion.rs).

View file

@ -0,0 +1,63 @@
use criterion::{black_box, criterion_group, Criterion};
use czv;
use czv::Result;
use std::path::PathBuf;
// macro_rules! get_test_files {
// ( $( $x:expr ),* ) => {
// {
// let test_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/resources/");
// let mut files = Vec::new();
// $(
// files.push(test_dir.join($x));
// )*
// files
// }
// };
// }
fn bench_row_count(file_path: PathBuf) -> Result<()> {
czv::count::row_count(Some(file_path), None, false)?;
Ok(())
}
fn bench_row_count_group(c: &mut Criterion) {
let mut group = c.benchmark_group("row_count");
group.bench_function("fruits.csv", |b| {
b.iter(|| bench_row_count(black_box("tests/resources/fruits.csv".into())))
});
group.bench_function("constituents_altnames.csv", |b| {
b.iter(|| {
bench_row_count(black_box(
"tests/resources/constituents_altnames.csv".into(),
))
})
});
group.finish();
}
fn bench_column_count(file_path: PathBuf) -> Result<()> {
czv::count::column_count(Some(file_path), None)?;
Ok(())
}
fn bench_column_count_group(c: &mut Criterion) {
let mut group = c.benchmark_group("column_count");
group.bench_function("fruits.csv", |b| {
b.iter(|| bench_column_count(black_box("tests/resources/fruits.csv".into())))
});
group.bench_function("constituents_altnames.csv", |b| {
b.iter(|| {
bench_column_count(black_box(
"tests/resources/constituents_altnames.csv".into(),
))
})
});
group.finish();
}
criterion_group!(
count_benches,
bench_row_count_group,
bench_column_count_group
);

7
czv/benches/benches.rs Normal file
View file

@ -0,0 +1,7 @@
// Import benchmarks
mod bench_count;
use bench_count::count_benches;
// Run all benchmarks
use criterion::criterion_main;
criterion_main!(count_benches);

View file

@ -0,0 +1,13 @@
use czv::{count::RowCount, Result};
fn main() -> Result<()> {
let data = "\
fruits,price
apple,2.50
banana,3.00
strawberry,1.50
";
let output = RowCount::new().file_data(data).execute()?;
println!("{output}"); // 3
Ok(())
}

188
czv/src/count.rs Normal file
View file

@ -0,0 +1,188 @@
use crate::{bail, Result};
use csv::ReaderBuilder;
use std::path::PathBuf;
use typed_builder::TypedBuilder;
/// Returns a count of the total number of rows. Recommended alternative: [`czv::count::RowCount`](crate::count::RowCount).
///
/// See [`czv::count::RowCount`](crate::count::RowCount) for a builder version (recommended) of this function.
///
/// ## Notes
///
/// - Specify whether to include the header row in the count with `include_header_row`.
/// - You may not use `file_path` and `file_data` together, only one may be specified.
///
/// ## Arguments
///
/// * `file_path` - CSV file path (alternative to `file_data`).
/// * `file_data` - CSV file data (alternative to `file_path`).
/// * `include_header_row` - Specify whether to include the header row (first row) in the row count.
pub fn row_count(
file_path: Option<PathBuf>,
file_data: Option<String>,
include_header_row: bool,
) -> Result<usize> {
let mut rdr = ReaderBuilder::new();
// file_path
if let Some(file_path) = file_path {
rdr.has_headers(!include_header_row);
return Ok(rdr.from_path(file_path)?.records().count());
}
// file_data
else if let Some(file_data) = file_data {
rdr.has_headers(!include_header_row);
return Ok(rdr.from_reader(file_data.as_bytes()).records().count());
}
// file_path and file_data were not provided
else {
bail!("Could not determine a file path or file data for row_count_builder.");
}
}
#[derive(TypedBuilder)]
#[builder(doc, builder_method(name=new, doc="Returns a `RowCountBuilder` to customize row count options by running other methods before getting the row count with the `execute` method."), build_method(vis="", name=__build))]
/// The entry point for setting up a [`czv::count::RowCountBuilder`](crate::count::RowCountBuilder) by running [`RowCount::new()`](crate::count::RowCount::new).
///
/// For example:
///
/// ```rust
/// use czv::{count::RowCount, Result};
///
/// fn main() -> Result<()> {
/// let data = "\
/// fruits,price
/// apple,2.50
/// banana,3.00
/// strawberry,1.50
/// ";
/// let output = RowCount::new().file_data(data).execute()?;
/// println!("{output}"); // 3
/// Ok(())
/// }
/// ```
pub struct RowCount {
#[builder(
default,
setter(
doc = "CSV file path (alternative to `file_data`).",
strip_option,
into
)
)]
file_path: Option<PathBuf>,
#[builder(
default,
setter(
doc = "CSV file data (alternative to `file_path`).",
strip_option,
into
)
)]
file_data: Option<String>,
#[builder(
default = false,
setter(
doc = "Specify whether to include the header row (first row) in the row count. Defaults to false."
)
)]
include_header_row: bool,
}
#[allow(non_camel_case_types)]
impl<
__include_header_row: typed_builder::Optional<bool>,
__file_data: typed_builder::Optional<Option<String>>,
__file_path: typed_builder::Optional<Option<PathBuf>>,
> RowCountBuilder<(__file_path, __file_data, __include_header_row)>
{
/// Returns the row count.
pub fn execute(self) -> Result<usize> {
let builder = self.__build();
row_count(
builder.file_path,
builder.file_data,
builder.include_header_row,
)
}
}
/// Returns a count of the total number of columns (fields). Recommended alternative: [`czv::count::ColumnCount`](crate::count::ColumnCount).
///
/// See [`czv::count::ColumnCount`](crate::count::ColumnCount) for a builder version (recommended) of this function.
///
/// ## Arguments
///
/// * `file_path` - CSV file path (alternative to `file_data`).
/// * `file_data` - CSV file data (alternative to `file_path`).
pub fn column_count(file_path: Option<PathBuf>, file_data: Option<String>) -> Result<usize> {
let rdr = ReaderBuilder::new();
// file_path
if let Some(file_path) = file_path {
return Ok(rdr.from_path(file_path)?.headers()?.len());
}
// file_data
else if let Some(file_data) = file_data {
return Ok(rdr.from_reader(file_data.as_bytes()).headers()?.len());
}
// file_path and file_data were not provided
else {
bail!("Could not determine a file path or file data for column_count_builder.");
}
}
#[derive(TypedBuilder)]
#[builder(doc, builder_method(name=new, doc="Returns a `ColumnCountBuilder` to customize column count options by running other methods before getting the column count with the `execute` method."), build_method(vis="", name=__build))]
/// The entry point for setting up a [`czv::count::ColumnCountBuilder`](crate::count::ColumnCountBuilder) by running [`ColumnCount::new()`](crate::count::ColumnCount::new).
///
/// For example:
///
/// ```rust
/// use czv::{count::ColumnCount, Result};
///
/// fn main() -> Result<()> {
/// let data = r#"fruits,price
/// apple,2.50
/// banana,3.00
/// strawberry,1.50"#;
/// let output = ColumnCount::new().file_data(data).execute()?;
/// println!("{output}"); // 2
/// Ok(())
/// }
/// ```
pub struct ColumnCount {
#[builder(
default,
setter(
doc = "CSV file path (alternative to `file_data`).",
strip_option,
into
)
)]
file_path: Option<PathBuf>,
#[builder(
default,
setter(
doc = "CSV file path (alternative to `file_path`).",
strip_option,
into
)
)]
file_data: Option<String>,
}
#[allow(non_camel_case_types)]
impl<
__file_data: typed_builder::Optional<Option<String>>,
__file_path: typed_builder::Optional<Option<PathBuf>>,
> ColumnCountBuilder<(__file_path, __file_data)>
{
pub fn execute(self) -> Result<usize> {
let builder = self.__build();
column_count(builder.file_path, builder.file_data)
}
}

58
czv/src/lib.rs Normal file
View file

@ -0,0 +1,58 @@
//! # czv
//!
//! CSV operations library for data engineering/analysis tasks.
//!
//! ## Example
//!
//! ```rust
//! use czv::{count::RowCount, Result};
//!
//! fn main() -> Result<()> {
//! let data = "\
//! fruits,price
//! apple,2.50
//! banana,3.00
//! strawberry,1.50
//! ";
//! let output = RowCount::new().file_data(data).execute()?;
//! println!("{output}"); // 3
//! Ok(())
//! }
//! ```
//!
//! ## Links
//!
//! - Source code: <https://github.com/rzmk/czv>
//! - czv-wasm: <https://github.com/rzmk/czv>
//! - czv-python: <https://github.com/rzmk/czv>
/// Counting operations including row count and column count.
pub mod count;
/// Extract a section of rows.
pub mod slice;
#[allow(dead_code)]
#[derive(Debug)]
pub struct CzvError(anyhow::Error);
impl From<anyhow::Error> for CzvError {
fn from(value: anyhow::Error) -> Self {
value.into()
}
}
impl From<csv::Error> for CzvError {
fn from(value: csv::Error) -> Self {
value.into()
}
}
pub type Result<T, E = CzvError> = anyhow::Result<T, E>;
#[macro_export]
#[allow(unused_macros)]
macro_rules! bail {
($err:expr $(,)?) => {
return Err(crate::CzvError(anyhow::anyhow!($err)))
};
}

281
czv/src/slice.rs Normal file
View file

@ -0,0 +1,281 @@
use crate::{bail, Result};
use csv::ReaderBuilder;
use std::path::PathBuf;
use typed_builder::TypedBuilder;
/// Returns a slice of rows from the CSV data.
///
/// See [`czv::slice::Slice`](crate::slice::Slice) for a builder version (recommended) of this function.
///
/// ## Example
///
/// We have a file `fruits.csv` at the path `tests/resources/fruits.csv`:
///
/// ```csv
/// fruit,price
/// apple,2.50
/// banana,3.00
/// strawberry,1.50
/// ```
///
/// Let's print the first two non-header rows in the CSV file:
///
/// ```rust
/// use czv::{
/// Result,
/// slice::Slice
/// };
///
/// fn main() -> Result<()> {
/// let file_path = "tests/resources/fruits.csv";
///
/// let output: String = Slice::new()
/// .file_path(file_path)
/// .start(0)
/// .end(2) // exclusive
/// .include_header_row(false)
/// .execute()?;
///
/// println!("{output}");
///
/// Ok(())
/// }
/// ```
///
/// Printed to the terminal we get:
///
/// ```console
/// apple,2.50
/// banana,3.00
/// ```
///
/// ## Notes
///
/// - You may not use `file_path` and `file_data` together, only one may be specified.
/// - You may not use `end` and `length` together, only one may be specified.
/// - You may not use `index` with any of `start`, `end`, or `length`.
///
/// ## Arguments
///
/// * `file_path` - CSV file path (alternative to `file_data`).
/// * `file_data` - CSV file data (alternative to `file_path`).
/// * `start` - The index of the record to slice from (0-indexed).
/// If negative, starts from the last record.
/// * `end` - The index of the record to slice to.
/// * `length` - The length of the slice (alternative to `end`).
/// * `index` - Slice a single record. If negative, starts from the last record.
/// * `include_header_row` - Specify whether to include the header row (first row) in the records.
pub fn slice(
file_path: Option<PathBuf>,
file_data: Option<String>,
start: Option<i32>,
end: Option<i32>,
length: Option<i32>,
index: Option<i32>,
include_header_row: bool,
) -> Result<String> {
match (file_path, file_data) {
(Some(_), Some(_)) => bail!("Cannot have both file_path and file_data, specify one only."),
(None, None) => bail!("Must provide either file_path or file_data."),
(Some(file_path), None) => {
let mut rdr = ReaderBuilder::new()
.has_headers(include_header_row)
.from_path(file_path)?;
if !include_header_row {
rdr.byte_headers()?;
}
let original_pos = rdr.position().to_owned();
let records_count = rdr.records().count();
if index.is_some() && (start.is_some() || end.is_some() || length.is_some()) {
bail!("Cannot use index with start, end, or length.")
}
if end.is_some() && length.is_some() {
bail!("Cannot use end with length.")
}
let start_line = match start {
Some(x) => {
if x > records_count as i32 {
bail!("start value {x} cannot be greater than the number of records.")
} else if x >= 0 {
Some(x as usize)
} else {
Some((records_count as i32 + x) as usize)
}
}
None => None,
};
let end_line = match end {
Some(x) => {
if x >= records_count as i32 {
bail!("end value {x} cannot be greater than or equal to the number of records.")
} else if x >= 0 {
Some(x as usize)
} else {
Some((records_count as i32 + x) as usize)
}
}
None => None,
};
rdr.seek(original_pos)?;
let mut res_vec: Vec<String> = vec![];
if let Some(idx) = index {
return Ok(rdr
.records()
.skip(idx as usize)
.next()
.unwrap()?
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join(","));
}
for r in rdr
.records()
.skip(start_line.unwrap())
.take(end_line.unwrap() - start_line.unwrap())
{
let record = r?;
res_vec.push(
record
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join(","),
);
}
Ok(res_vec
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join("\n"))
}
(None, Some(file_data)) => {
let mut rdr = ReaderBuilder::new()
.has_headers(include_header_row)
.from_reader(std::io::Cursor::new(file_data.as_str()));
if !include_header_row {
rdr.byte_headers()?;
}
let original_pos = rdr.position().to_owned();
let records_count = rdr.records().count();
if index.is_some() && (start.is_some() || end.is_some() || length.is_some()) {
bail!("Cannot use index with start, end, or length.")
}
if end.is_some() && length.is_some() {
bail!("Cannot use end with length.")
}
let start_line = match start {
Some(x) => {
if x > records_count as i32 {
bail!("start value {x} cannot be greater than the number of records.")
} else if x >= 0 {
Some(x as usize)
} else {
Some((records_count as i32 + x) as usize)
}
}
None => None,
};
let end_line = match end {
Some(x) => {
if x >= records_count as i32 {
bail!("end value {x} cannot be greater than or equal to the number of records.")
} else if x >= 0 {
Some(x as usize)
} else {
Some((records_count as i32 + x) as usize)
}
}
None => None,
};
rdr.seek(original_pos)?;
let mut res_vec: Vec<String> = vec![];
for r in rdr
.records()
.skip(start_line.unwrap())
.take(end_line.unwrap() - start_line.unwrap())
{
let record = r?;
res_vec.push(
record
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join(","),
);
}
Ok(res_vec
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join("\n"))
}
}
}
#[derive(TypedBuilder)]
#[builder(builder_method(name=new), build_method(vis="", name=__build))]
pub struct Slice {
#[builder(default, setter(strip_option, into))]
file_path: Option<PathBuf>,
#[builder(default, setter(strip_option, into))]
file_data: Option<String>,
#[builder(default, setter(strip_option))]
start: Option<i32>,
#[builder(default, setter(strip_option))]
end: Option<i32>,
#[builder(default, setter(strip_option))]
length: Option<i32>,
#[builder(default, setter(strip_option))]
index: Option<i32>,
#[builder(default = false)]
include_header_row: bool,
}
#[allow(non_camel_case_types)]
impl<
__include_header_row: typed_builder::Optional<bool>,
__index: typed_builder::Optional<Option<i32>>,
__length: typed_builder::Optional<Option<i32>>,
__end: typed_builder::Optional<Option<i32>>,
__start: typed_builder::Optional<Option<i32>>,
__file_data: typed_builder::Optional<Option<String>>,
__file_path: typed_builder::Optional<Option<PathBuf>>,
>
SliceBuilder<(
__file_path,
__file_data,
__start,
__end,
__length,
__index,
__include_header_row,
)>
{
pub fn execute(self) -> Result<String> {
let builder = self.__build();
slice(
builder.file_path,
builder.file_data,
builder.start,
builder.end,
builder.length,
builder.index,
builder.include_header_row,
)
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,4 @@
fruit,price
apple,2.50
banana,3.00
strawberry,1.50
1 fruit price
2 apple 2.50
3 banana 3.00
4 strawberry 1.50

56
czv/tests/test_count.rs Normal file
View file

@ -0,0 +1,56 @@
use czv;
use czv::Result;
#[test]
fn test_row_count() -> Result<()> {
let cases = vec![
("tests/resources/fruits.csv", 3),
("tests/resources/constituents_altnames.csv", 33971),
];
for (file_name, expected) in cases {
let got = czv::count::row_count(Some(file_name.into()), None, false)?;
assert_eq!(expected, got);
}
Ok(())
}
#[test]
fn test_row_count_builder() -> Result<()> {
let cases = vec![
("tests/resources/fruits.csv", 3),
("tests/resources/constituents_altnames.csv", 33971),
];
for (file_name, expected) in cases {
let got = czv::count::RowCount::new().file_path(file_name).execute()?;
assert_eq!(expected, got);
}
Ok(())
}
#[test]
fn test_column_count() -> Result<()> {
let cases = vec![
("tests/resources/fruits.csv", 2),
("tests/resources/constituents_altnames.csv", 6),
];
for (file_name, expected) in cases {
let got = czv::count::column_count(Some(file_name.into()), None)?;
assert_eq!(expected, got);
}
Ok(())
}
#[test]
fn test_column_count_builder() -> Result<()> {
let cases = vec![
("tests/resources/fruits.csv", 2),
("tests/resources/constituents_altnames.csv", 6),
];
for (file_name, expected) in cases {
let got = czv::count::ColumnCount::new()
.file_path(file_name)
.execute()?;
assert_eq!(expected, got);
}
Ok(())
}

75
czv/tests/test_slice.rs Normal file
View file

@ -0,0 +1,75 @@
use czv;
use czv::Result;
#[test]
fn test_slice_start_end() -> Result<()> {
let cases = vec![(
"tests/resources/fruits.csv",
1,
3,
"banana,3.00\nstrawberry,1.50".to_string(),
)];
for (file_name, start, end, expected) in cases {
let got = czv::slice::slice(
Some(file_name.into()),
None,
Some(start),
Some(end),
None,
None,
false,
)?;
assert_eq!(expected, got);
}
Ok(())
}
#[test]
fn test_slice_start_end_data() -> Result<()> {
let cases = vec![(
"fruit,price\napple,2.50\nbanana,3.00\nstrawberry,1.50".to_string(),
1,
3,
"banana,3.00\nstrawberry,1.50".to_string(),
)];
for (file_data, start, end, expected) in cases {
let got = czv::slice::slice(
None,
Some(file_data),
Some(start),
Some(end),
None,
None,
false,
)?;
assert_eq!(expected, got);
}
Ok(())
}
#[test]
fn test_slice_start_0_end_3() -> Result<()> {
let expected = "apple,2.50\nbanana,3.00".to_string();
let got: String = czv::slice::Slice::new()
.file_path("tests/resources/fruits.csv")
.start(0)
.end(2) // exclusive
.include_header_row(false)
.execute()?;
assert_eq!(expected, got);
Ok(())
}
#[test]
fn test_slice_index_2() -> Result<()> {
let expected = "strawberry,1.50".to_string();
let got: String = czv::slice::Slice::new()
.file_path("tests/resources/fruits.csv")
.index(2)
.include_header_row(false)
.execute()?;
assert_eq!(expected, got);
Ok(())
}