mirror of
https://github.com/rzmk/czv.git
synced 2025-12-19 00:09:24 +00:00
refactor: use options object for WASM function args, improve docs
- Add relevant links to all READMEs and source code - Resolve clippy lints czv: - Add more docs for top-level items - Add suggestion to use builder methods instead of functions - Disable slice and slice tests until operation is complete czv-wasm: - Use tsify_next for allowing objects as parameters - Add nodejs example and instructions
This commit is contained in:
parent
ce260e9491
commit
e84c5bec8b
20 changed files with 564 additions and 168 deletions
63
Cargo.lock
generated
63
Cargo.lock
generated
|
|
@ -244,7 +244,10 @@ version = "0.0.0"
|
|||
dependencies = [
|
||||
"anyhow",
|
||||
"csv",
|
||||
"serde",
|
||||
"serde-wasm-bindgen",
|
||||
"thiserror",
|
||||
"tsify-next",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-test",
|
||||
]
|
||||
|
|
@ -255,6 +258,19 @@ version = "1.12.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
|
||||
|
||||
[[package]]
|
||||
name = "gloo-utils"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b5555354113b18c547c1d3a98fbf7fb32a9ff4f6fa112ce823a21641a0ba3aa"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"wasm-bindgen",
|
||||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "half"
|
||||
version = "2.4.1"
|
||||
|
|
@ -628,6 +644,17 @@ dependencies = [
|
|||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde-wasm-bindgen"
|
||||
version = "0.6.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8302e169f0eddcc139c70f139d19d6467353af16f9fce27e8c30158036a1e16b"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"serde",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.203"
|
||||
|
|
@ -639,6 +666,17 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive_internals"
|
||||
version = "0.29.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.117"
|
||||
|
|
@ -709,6 +747,31 @@ dependencies = [
|
|||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tsify-next"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0407c98efe14c83fd025675246bf855f0ed750b6b57fa6ab839ade598ceb5374"
|
||||
dependencies = [
|
||||
"gloo-utils",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tsify-next-macros",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tsify-next-macros"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "615b1b80acc6720d554777d55ed4ed35a5ceb0e0df7ff7cf9d3946ecbf74953e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"serde_derive_internals",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typed-builder"
|
||||
version = "0.18.2"
|
||||
|
|
|
|||
23
README.md
23
README.md
|
|
@ -2,8 +2,17 @@
|
|||
|
||||
czv is CSV content manipulation/analysis libraries with support for Rust, Python, and WebAssembly (JavaScript and TypeScript).
|
||||
|
||||
## Links
|
||||
|
||||
- czv GitHub repository: <https://github.com/rzmk/czv>
|
||||
- Rust: [crates.io/crates/czv](https://crates.io/crates/czv) ([source code](https://github.com/rzmk/czv/tree/main/czv))
|
||||
- WebAssembly (JavaScript/TypeScript): [npmjs.com/package/czv](https://www.npmjs.com/package/czv) ([source code](https://github.com/rzmk/czv/tree/main/czv-wasm))
|
||||
- Python: [pypi.org/project/czv](https://pypi.org/project/czv/) ([source code](https://github.com/rzmk/czv/tree/main/czv-python))
|
||||
|
||||
## Installation and examples
|
||||
|
||||
In the following examples we'll get the total number of rows in the CSV data including the header row.
|
||||
|
||||
### Rust
|
||||
|
||||
```bash
|
||||
|
|
@ -11,10 +20,7 @@ cargo install czv
|
|||
```
|
||||
|
||||
```rust
|
||||
use czv::{
|
||||
count::RowCount,
|
||||
Result
|
||||
};
|
||||
use czv::{RowCount, Result};
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let data = "\
|
||||
|
|
@ -25,8 +31,9 @@ strawberry,1.50
|
|||
";
|
||||
let output = RowCount::new()
|
||||
.file_data(data)
|
||||
.include_header_row(true)
|
||||
.execute()?;
|
||||
println!("{output}"); // 3
|
||||
println!("{output}"); // 4
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
|
@ -49,7 +56,11 @@ apple,2.50
|
|||
banana,3.00
|
||||
strawberry,1.50`;
|
||||
|
||||
const output = czv.rowCount(data);
|
||||
const output = czv.rowCount({
|
||||
file_data: data,
|
||||
include_header_row: true,
|
||||
});
|
||||
|
||||
console.log(output);
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -2,8 +2,12 @@
|
|||
|
||||
Python library for [czv](https://github.com/rzmk/czv). czv is a library of CSV-related operations for data engineering and analysis tasks.
|
||||
|
||||
- For a Rust library see [czv](https://github.com/rzmk/czv/tree/main/czv).
|
||||
- For a WebAssembly (JavaScript, TypeScript) library see [czv-wasm](https://github.com/rzmk/czv/tree/main/czv-wasm).
|
||||
## Links
|
||||
|
||||
- czv GitHub repository: <https://github.com/rzmk/czv>
|
||||
- Rust: [crates.io/crates/czv](https://crates.io/crates/czv) ([source code](https://github.com/rzmk/czv/tree/main/czv))
|
||||
- WebAssembly (JavaScript/TypeScript): [npmjs.com/package/czv](https://www.npmjs.com/package/czv) ([source code](https://github.com/rzmk/czv/tree/main/czv-wasm))
|
||||
- Python: [pypi.org/project/czv](https://pypi.org/project/czv/) ([source code](https://github.com/rzmk/czv/tree/main/czv-python))
|
||||
|
||||
## Installation and example
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
"""
|
||||
# czv
|
||||
|
||||
Python library for [czv](https://github.com/rzmk/czv). CSV content manipulation and analysis.
|
||||
Python library for [czv](https://github.com/rzmk/czv). CSV operations library for data engineering/analysis tasks.
|
||||
|
||||
## Install
|
||||
|
||||
|
|
@ -19,6 +19,13 @@ output = row_count(file_path="fruits.csv")
|
|||
print(output)
|
||||
```
|
||||
|
||||
## Links
|
||||
|
||||
- czv GitHub repository: <https://github.com/rzmk/czv>
|
||||
- Rust: [crates.io/crates/czv](https://crates.io/crates/czv) ([source code](https://github.com/rzmk/czv/tree/main/czv))
|
||||
- WebAssembly (JavaScript/TypeScript): [npmjs.com/package/czv](https://www.npmjs.com/package/czv) ([source code](https://github.com/rzmk/czv/tree/main/czv-wasm))
|
||||
- Python: [pypi.org/project/czv](https://pypi.org/project/czv/) ([source code](https://github.com/rzmk/czv/tree/main/czv-python))
|
||||
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
|
|
|||
|
|
@ -12,6 +12,9 @@ crate-type = ["cdylib", "rlib"]
|
|||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
csv = "1.3.0"
|
||||
serde = { version = "1.0.203", features = ["derive"] }
|
||||
serde-wasm-bindgen = "0.6.5"
|
||||
thiserror = "1.0.61"
|
||||
tsify-next = "0.5.3"
|
||||
wasm-bindgen = { version = "0.2" }
|
||||
wasm-bindgen-test = { version = "0.3.42" }
|
||||
|
|
|
|||
|
|
@ -2,8 +2,12 @@
|
|||
|
||||
WebAssembly (JavaScript and TypeScript) library for [czv](https://github.com/rzmk/czv). czv is a library of CSV-related operations for data engineering and analysis tasks.
|
||||
|
||||
- For a Rust library see [czv](https://github.com/rzmk/czv/tree/main/czv).
|
||||
- For a Python library see [czv-python](https://github.com/rzmk/czv/tree/main/czv-python).
|
||||
## Links
|
||||
|
||||
- czv GitHub repository: <https://github.com/rzmk/czv>
|
||||
- Rust: [crates.io/crates/czv](https://crates.io/crates/czv) ([source code](https://github.com/rzmk/czv/tree/main/czv))
|
||||
- WebAssembly (JavaScript/TypeScript): [npmjs.com/package/czv](https://www.npmjs.com/package/czv) ([source code](https://github.com/rzmk/czv/tree/main/czv-wasm))
|
||||
- Python: [pypi.org/project/czv](https://pypi.org/project/czv/) ([source code](https://github.com/rzmk/czv/tree/main/czv-python))
|
||||
|
||||
## Installation and example
|
||||
|
||||
|
|
@ -23,7 +27,11 @@ apple,2.50
|
|||
banana,3.00
|
||||
strawberry,1.50`;
|
||||
|
||||
const output = czv.rowCount(data);
|
||||
const output = czv.rowCount({
|
||||
file_data: data,
|
||||
include_header_row: true,
|
||||
});
|
||||
|
||||
console.log(output);
|
||||
```
|
||||
|
||||
|
|
@ -38,9 +46,11 @@ cargo install wasm-pack
|
|||
### Build WASM for web
|
||||
|
||||
```bash
|
||||
wasm-pack build --release --target web
|
||||
wasm-pack build --release --target web --out-name czv
|
||||
```
|
||||
|
||||
**Note**: Currently you must modify the `pkg/package.json` by replacing the name from `czv-wasm` to `czv`.
|
||||
|
||||
### Test WASM for browser
|
||||
|
||||
```bash
|
||||
|
|
|
|||
1
czv-wasm/examples/basic-demo/README.md
Normal file
1
czv-wasm/examples/basic-demo/README.md
Normal file
|
|
@ -0,0 +1 @@
|
|||
If you want to modify the TypeScript code then after you save your modifications you must run `tsc script.ts --module es2022 --target es2022` to generate a compatible `script.js` file.
|
||||
|
|
@ -40,30 +40,7 @@
|
|||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<script type="module">
|
||||
const input = document.getElementById("upload")
|
||||
const progress = document.getElementById("progress")
|
||||
|
||||
import init, * as czv from "../../pkg/czv.js";
|
||||
// Must run `await init()` or `initSync()` first for web use
|
||||
await init();
|
||||
|
||||
const fileReader = new FileReader()
|
||||
|
||||
fileReader.onloadstart = () => {
|
||||
progress.style.display = "block";
|
||||
}
|
||||
|
||||
fileReader.onloadend = () => {
|
||||
document.getElementById("column-count").innerText = czv.columnCount(fileReader.result)
|
||||
document.getElementById("row-count").innerText = czv.rowCount(fileReader.result)
|
||||
progress.style.display = "none";
|
||||
}
|
||||
|
||||
input.addEventListener("change", () => {
|
||||
fileReader.readAsText(input.files[0])
|
||||
})
|
||||
</script>
|
||||
<script src="script.js" type="module"></script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
27
czv-wasm/examples/basic-demo/script.js
Normal file
27
czv-wasm/examples/basic-demo/script.js
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
const progress = document.getElementById("progress");
|
||||
import init, * as czv from "../../pkg/czv.js";
|
||||
// Must run `await init()` or `initSync()` first for web use
|
||||
await init();
|
||||
const fileReader = new FileReader();
|
||||
fileReader.onloadstart = () => {
|
||||
if (progress)
|
||||
progress.style.display = "block";
|
||||
};
|
||||
fileReader.onloadend = () => {
|
||||
const rowCountElement = document.getElementById("row-count");
|
||||
const columnCountElement = document.getElementById("column-count");
|
||||
if (rowCountElement)
|
||||
rowCountElement.innerText = String(czv.rowCount({
|
||||
file_data: fileReader.result,
|
||||
}));
|
||||
if (columnCountElement)
|
||||
columnCountElement.innerText = String(czv.columnCount({ file_data: fileReader.result }));
|
||||
if (progress)
|
||||
progress.style.display = "none";
|
||||
};
|
||||
const input = document.getElementById("upload");
|
||||
if (input)
|
||||
input.addEventListener("change", () => {
|
||||
// @ts-ignore
|
||||
fileReader.readAsText(input.files[0]);
|
||||
});
|
||||
34
czv-wasm/examples/basic-demo/script.ts
Normal file
34
czv-wasm/examples/basic-demo/script.ts
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
const progress = document.getElementById("progress");
|
||||
|
||||
import init, * as czv from "../../pkg/czv.js";
|
||||
// Must run `await init()` or `initSync()` first for web use
|
||||
await init();
|
||||
|
||||
const fileReader = new FileReader();
|
||||
|
||||
fileReader.onloadstart = () => {
|
||||
if (progress) progress.style.display = "block";
|
||||
};
|
||||
|
||||
fileReader.onloadend = () => {
|
||||
const rowCountElement = document.getElementById("row-count");
|
||||
const columnCountElement = document.getElementById("column-count");
|
||||
if (rowCountElement)
|
||||
rowCountElement.innerText = String(
|
||||
czv.rowCount({
|
||||
file_data: fileReader.result as string,
|
||||
})
|
||||
);
|
||||
if (columnCountElement)
|
||||
columnCountElement.innerText = String(
|
||||
czv.columnCount({ file_data: fileReader.result as string })
|
||||
);
|
||||
if (progress) progress.style.display = "none";
|
||||
};
|
||||
|
||||
const input = document.getElementById("upload");
|
||||
if (input)
|
||||
input.addEventListener("change", () => {
|
||||
// @ts-ignore
|
||||
fileReader.readAsText(input.files[0]);
|
||||
});
|
||||
4
czv-wasm/examples/node-demo/README.md
Normal file
4
czv-wasm/examples/node-demo/README.md
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
To run this example:
|
||||
|
||||
1. Build the `nodejs` compatible bundle in the `czv-wasm` directory with `wasm-pack build -t nodejs --release --out-name czv-ts`.
|
||||
2. Run `bunx tsx sample.ts` (or `npx`, `pnpx`, etc.).
|
||||
12
czv-wasm/examples/node-demo/sample.ts
Normal file
12
czv-wasm/examples/node-demo/sample.ts
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
import czv from "../../pkg/czv-ts";
|
||||
|
||||
const options: czv.RowCountOptions = {
|
||||
file_data: `fruit,price,
|
||||
apple,2.50
|
||||
banana,3.00
|
||||
strawberry,1.50`,
|
||||
};
|
||||
|
||||
const output = czv.rowCount(options);
|
||||
|
||||
console.log(output); // 4
|
||||
|
|
@ -1,32 +1,50 @@
|
|||
use crate::Result;
|
||||
use csv::ReaderBuilder;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tsify_next::Tsify;
|
||||
use wasm_bindgen::prelude::*;
|
||||
|
||||
/// Options for `rowCount`.
|
||||
#[derive(Tsify, Serialize, Deserialize)]
|
||||
#[tsify(into_wasm_abi, from_wasm_abi)]
|
||||
pub struct RowCountOptions {
|
||||
/// CSV file data.
|
||||
pub file_data: String,
|
||||
|
||||
#[tsify(optional)]
|
||||
/// Specify whether to include the header row (first row) in the row count.
|
||||
/// Default is false.
|
||||
pub include_header_row: Option<bool>,
|
||||
}
|
||||
|
||||
/// Returns a count of the total number of rows.
|
||||
///
|
||||
/// @param {string} `file_data` CSV file data.
|
||||
/// @param {boolean | undefined} `include_header_row` Specify whether to include the header row (first row) in the row count. Default is false.
|
||||
/// @returns {number}
|
||||
#[wasm_bindgen(skip_jsdoc, js_name = rowCount)]
|
||||
pub fn row_count(file_data: String, include_header_row: Option<bool>) -> Result<usize> {
|
||||
pub fn row_count(options: RowCountOptions) -> Result<usize> {
|
||||
let mut rdr = ReaderBuilder::new();
|
||||
|
||||
rdr.has_headers(!include_header_row.unwrap_or(false));
|
||||
return Ok(rdr.from_reader(file_data.as_bytes()).records().count());
|
||||
rdr.has_headers(!options.include_header_row.unwrap_or(false));
|
||||
|
||||
Ok(rdr
|
||||
.from_reader(options.file_data.as_bytes())
|
||||
.records()
|
||||
.count())
|
||||
}
|
||||
|
||||
/// Options for `columnCount`.
|
||||
#[derive(Tsify, Serialize, Deserialize)]
|
||||
#[tsify(into_wasm_abi, from_wasm_abi)]
|
||||
pub struct ColumnCountOptions {
|
||||
/// CSV file data.
|
||||
pub file_data: String,
|
||||
}
|
||||
|
||||
/// Returns a count of the total number of columns (fields).
|
||||
///
|
||||
/// ## Arguments
|
||||
///
|
||||
/// @param {string} `file_data` CSV file data.
|
||||
#[wasm_bindgen(skip_jsdoc, js_name = columnCount)]
|
||||
pub fn column_count(file_data: Option<String>) -> Result<usize> {
|
||||
pub fn column_count(options: ColumnCountOptions) -> Result<usize> {
|
||||
let rdr = ReaderBuilder::new();
|
||||
|
||||
if let Some(file_data) = file_data {
|
||||
return Ok(rdr.from_reader(file_data.as_bytes()).headers()?.len());
|
||||
} else {
|
||||
bail!("Could not determine a file path or file data for column_count_builder.");
|
||||
}
|
||||
Ok(rdr
|
||||
.from_reader(options.file_data.as_bytes())
|
||||
.headers()?
|
||||
.len())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,45 @@
|
|||
//! # czv
|
||||
//!
|
||||
//! WASM library for [czv](https://github.com/rzmk/czv). CSV operations library for data engineering/analysis tasks.
|
||||
//!
|
||||
//! # Example
|
||||
//!
|
||||
//! Let's say we want to print the total number of non-header rows in our data:
|
||||
//!
|
||||
//! ```js
|
||||
//! import init, * as czv from "czv";
|
||||
//! // Must run `await init()` or `initSync()` first for web use
|
||||
//! await init();
|
||||
//!
|
||||
//! const data = `fruits,price
|
||||
//! apple,2.50
|
||||
//! banana,3.00
|
||||
//! strawberry,1.50`;
|
||||
//!
|
||||
//! const output = czv.rowCount({
|
||||
//! file_data: data,
|
||||
//! include_header_row: true,
|
||||
//! });
|
||||
//!
|
||||
//! console.log(output);
|
||||
//! ```
|
||||
//!
|
||||
//! For a full website example see the example's source code here: https://www.npmjs.com/package/czv) <https://github.com/rzmk/czv/tree/main/czv-wasm/examples/basic-demo>
|
||||
//!
|
||||
//! # Links
|
||||
//!
|
||||
//! - czv GitHub repository: <https://github.com/rzmk/czv>
|
||||
//! - Rust: [crates.io/crates/czv](https://crates.io/crates/czv) ([source code](https://github.com/rzmk/czv/tree/main/czv))
|
||||
//! - WebAssembly (JavaScript/TypeScript): [npmjs.com/package/czv](https://www.npmjs.com/package/czv) ([source code](https://github.com/rzmk/czv/tree/main/czv-wasm))
|
||||
//! - Python: [pypi.org/project/czv](https://pypi.org/project/czv/) ([source code](https://github.com/rzmk/czv/tree/main/czv-python))
|
||||
|
||||
#![allow(
|
||||
// https://github.com/madonoharu/tsify/issues/42
|
||||
non_snake_case,
|
||||
// https://github.com/rustwasm/wasm-bindgen/issues/3945
|
||||
clippy::empty_docs
|
||||
)]
|
||||
|
||||
use wasm_bindgen::JsValue;
|
||||
|
||||
// Error-handling helpers
|
||||
|
|
@ -11,9 +53,15 @@ impl From<csv::Error> for CzvError {
|
|||
}
|
||||
}
|
||||
|
||||
impl Into<JsValue> for CzvError {
|
||||
fn into(self) -> JsValue {
|
||||
JsValue::from_str(self.to_string().as_str())
|
||||
impl From<serde_wasm_bindgen::Error> for CzvError {
|
||||
fn from(value: serde_wasm_bindgen::Error) -> Self {
|
||||
value.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CzvError> for JsValue {
|
||||
fn from(val: CzvError) -> Self {
|
||||
JsValue::from_str(val.to_string().as_str())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use czv_wasm;
|
||||
use czv_wasm::count::RowCountOptions;
|
||||
use czv_wasm::Result;
|
||||
use wasm_bindgen_test::*;
|
||||
wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser);
|
||||
|
|
@ -6,14 +7,15 @@ wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser);
|
|||
#[test]
|
||||
#[wasm_bindgen_test]
|
||||
fn basic() -> Result<()> {
|
||||
let result = czv_wasm::count::row_count(
|
||||
"fruit,price
|
||||
let options = RowCountOptions {
|
||||
file_data: "fruit,price
|
||||
apple,2.00
|
||||
banana,1.50
|
||||
strawberry,3.00"
|
||||
.to_string(),
|
||||
Some(false),
|
||||
)?;
|
||||
include_header_row: Some(false),
|
||||
};
|
||||
let result = czv_wasm::count::row_count(options)?;
|
||||
assert_eq!(result, 3);
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,8 +2,12 @@
|
|||
|
||||
Rust library for [czv](https://github.com/rzmk/czv). czv is a library of CSV-related operations for data engineering and analysis tasks.
|
||||
|
||||
- For a WebAssembly (JavaScript, TypeScript) library see [czv-wasm](https://github.com/rzmk/czv/tree/main/czv-wasm).
|
||||
- For a Python library see [czv-python](https://github.com/rzmk/czv/tree/main/czv-python).
|
||||
## Links
|
||||
|
||||
- czv GitHub repository: <https://github.com/rzmk/czv>
|
||||
- Rust: [crates.io/crates/czv](https://crates.io/crates/czv) ([source code](https://github.com/rzmk/czv/tree/main/czv))
|
||||
- WebAssembly (JavaScript/TypeScript): [npmjs.com/package/czv](https://www.npmjs.com/package/czv) ([source code](https://github.com/rzmk/czv/tree/main/czv-wasm))
|
||||
- Python: [pypi.org/project/czv](https://pypi.org/project/czv/) ([source code](https://github.com/rzmk/czv/tree/main/czv-python))
|
||||
|
||||
## Usage
|
||||
|
||||
|
|
@ -16,7 +20,7 @@ cargo install czv
|
|||
Let's say we want to print the total number of rows in a 4x3 CSV file `fruits.csv` including the header row:
|
||||
|
||||
```rust
|
||||
use czv::{count::RowCount, Result};
|
||||
use czv::{RowCount, Result};
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let data = "\
|
||||
|
|
|
|||
|
|
@ -42,12 +42,16 @@ pub fn row_count(
|
|||
|
||||
#[derive(TypedBuilder)]
|
||||
#[builder(doc, builder_method(name=new, doc="Returns a `RowCountBuilder` to customize row count options by running other methods before getting the row count with the `execute` method."), build_method(vis="", name=__build))]
|
||||
/// Returns a count of the total number of rows.
|
||||
///
|
||||
/// The entry point for setting up a [`czv::count::RowCountBuilder`](crate::count::RowCountBuilder) by running [`RowCount::new()`](crate::count::RowCount::new).
|
||||
///
|
||||
/// For example:
|
||||
/// # Example
|
||||
///
|
||||
/// Let's say we want to print the total number of non-header rows in our data:
|
||||
///
|
||||
/// ```rust
|
||||
/// use czv::{count::RowCount, Result};
|
||||
/// use czv::{RowCount, Result};
|
||||
///
|
||||
/// fn main() -> Result<()> {
|
||||
/// let data = "\
|
||||
|
|
@ -113,6 +117,26 @@ impl<
|
|||
///
|
||||
/// See [`czv::count::ColumnCount`](crate::count::ColumnCount) for a builder version (recommended) of this function.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// Let's say we want to print the total number of columns in our data:
|
||||
///
|
||||
/// ```rust
|
||||
/// use czv::{ColumnCount, Result};
|
||||
///
|
||||
/// fn main() -> Result<()> {
|
||||
/// let data = "\
|
||||
/// fruits,price
|
||||
/// apple,2.50
|
||||
/// banana,3.00
|
||||
/// strawberry,1.50
|
||||
/// ";
|
||||
/// let output = ColumnCount::new().file_data(data).execute()?;
|
||||
/// println!("{output}"); // 2
|
||||
/// Ok(())
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// ## Arguments
|
||||
///
|
||||
/// * `file_path` - CSV file path (alternative to `file_data`).
|
||||
|
|
@ -122,7 +146,7 @@ pub fn column_count(file_path: Option<PathBuf>, file_data: Option<String>) -> Re
|
|||
|
||||
// file_path
|
||||
if let Some(file_path) = file_path {
|
||||
return Ok(rdr.from_path(file_path)?.headers()?.len());
|
||||
Ok(rdr.from_path(file_path)?.headers()?.len())
|
||||
}
|
||||
// file_data
|
||||
else if let Some(file_data) = file_data {
|
||||
|
|
@ -136,6 +160,8 @@ pub fn column_count(file_path: Option<PathBuf>, file_data: Option<String>) -> Re
|
|||
|
||||
#[derive(TypedBuilder)]
|
||||
#[builder(doc, builder_method(name=new, doc="Returns a `ColumnCountBuilder` to customize column count options by running other methods before getting the column count with the `execute` method."), build_method(vis="", name=__build))]
|
||||
/// Returns a count of the total number of columns (fields).
|
||||
///
|
||||
/// The entry point for setting up a [`czv::count::ColumnCountBuilder`](crate::count::ColumnCountBuilder) by running [`ColumnCount::new()`](crate::count::ColumnCount::new).
|
||||
///
|
||||
/// For example:
|
||||
|
|
|
|||
|
|
@ -2,10 +2,12 @@
|
|||
//!
|
||||
//! CSV operations library for data engineering/analysis tasks.
|
||||
//!
|
||||
//! ## Example
|
||||
//! # Example
|
||||
//!
|
||||
//! Let's say we want to print the total number of non-header rows in our data:
|
||||
//!
|
||||
//! ```rust
|
||||
//! use czv::{count::RowCount, Result};
|
||||
//! use czv::{RowCount, Result};
|
||||
//!
|
||||
//! fn main() -> Result<()> {
|
||||
//! let data = "\
|
||||
|
|
@ -20,21 +22,35 @@
|
|||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ## Links
|
||||
//! # Usage
|
||||
//!
|
||||
//! - Source code: <https://github.com/rzmk/czv>
|
||||
//! - czv-wasm: <https://github.com/rzmk/czv>
|
||||
//! - czv-python: <https://github.com/rzmk/czv>
|
||||
//! It is recommended to use the builder structs rather than functions though both are provided.
|
||||
//!
|
||||
//! The builder structs are provided at the top-level for ease of use.
|
||||
//!
|
||||
//! For example use the [czv::RowCount](crate::RowCount) struct rather than the [czv::count::row_count](crate::count::row_count) function.
|
||||
//!
|
||||
//! # Links
|
||||
//!
|
||||
//! - czv GitHub repository: <https://github.com/rzmk/czv>
|
||||
//! - Rust: [crates.io/crates/czv](https://crates.io/crates/czv) ([source code](https://github.com/rzmk/czv/tree/main/czv))
|
||||
//! - WebAssembly (JavaScript/TypeScript): [npmjs.com/package/czv](https://www.npmjs.com/package/czv) ([source code](https://github.com/rzmk/czv/tree/main/czv-wasm))
|
||||
//! - Python: [pypi.org/project/czv](https://pypi.org/project/czv/) ([source code](https://github.com/rzmk/czv/tree/main/czv-python))
|
||||
|
||||
/// Counting operations including row count and column count.
|
||||
pub mod count;
|
||||
#[doc(inline)]
|
||||
pub use count::{ColumnCount, RowCount};
|
||||
/// Extract a section of rows.
|
||||
pub mod slice;
|
||||
// pub mod slice;
|
||||
// #[doc(inline)]
|
||||
// pub use slice::Slice;
|
||||
|
||||
#[allow(dead_code)]
|
||||
// Error-handling helpers
|
||||
#[allow(dead_code)]
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
#[error("{0}")]
|
||||
/// Common catch-all error type based on [anyhow::Error].
|
||||
pub struct CzvError(pub anyhow::Error);
|
||||
|
||||
impl From<anyhow::Error> for CzvError {
|
||||
|
|
@ -49,10 +65,23 @@ impl From<csv::Error> for CzvError {
|
|||
}
|
||||
}
|
||||
|
||||
/// Common Result type based on [anyhow::Result] and [czv::CzvError](crate::CzvError).
|
||||
pub type Result<T, E = CzvError> = anyhow::Result<T, E>;
|
||||
|
||||
#[macro_export]
|
||||
#[allow(unused_macros)]
|
||||
/// Function-like macro you may pass a `&str` to return a [czv::CzvError](crate::CzvError).
|
||||
///
|
||||
/// For example:
|
||||
///
|
||||
/// ```should_panic
|
||||
/// use czv::{bail, CzvError, Result};
|
||||
///
|
||||
/// fn main() -> Result<()> {
|
||||
/// bail!("This is an example of an error message using `bail!`.");
|
||||
/// }
|
||||
/// ```
|
||||
#[allow(clippy::crate_in_macro_def)]
|
||||
macro_rules! bail {
|
||||
($err:expr $(,)?) => {
|
||||
return Err(crate::CzvError(anyhow::anyhow!($err)))
|
||||
|
|
|
|||
139
czv/src/slice.rs
139
czv/src/slice.rs
|
|
@ -21,20 +21,20 @@ use typed_builder::TypedBuilder;
|
|||
/// Let's print the first two non-header rows in the CSV file:
|
||||
///
|
||||
/// ```rust
|
||||
/// use czv::{
|
||||
/// Result,
|
||||
/// slice::Slice
|
||||
/// };
|
||||
/// use czv::Result;
|
||||
///
|
||||
/// fn main() -> Result<()> {
|
||||
/// let file_path = "tests/resources/fruits.csv";
|
||||
///
|
||||
/// let output: String = Slice::new()
|
||||
/// .file_path(file_path)
|
||||
/// .start(0)
|
||||
/// .end(2) // exclusive
|
||||
/// .include_header_row(false)
|
||||
/// .execute()?;
|
||||
/// let output: String = czv::slice::slice(
|
||||
/// Some(file_path.into()), // file_path
|
||||
/// None, // file_data
|
||||
/// Some(0), // start
|
||||
/// Some(2), // end
|
||||
/// None, // length
|
||||
/// None, // index
|
||||
/// false // include_header_row
|
||||
/// )?;
|
||||
///
|
||||
/// println!("{output}");
|
||||
///
|
||||
|
|
@ -106,14 +106,23 @@ pub fn slice(
|
|||
}
|
||||
None => None,
|
||||
};
|
||||
let end_line = match end {
|
||||
Some(x) => {
|
||||
let end_line = match (end, length) {
|
||||
(Some(e), None) => {
|
||||
if x >= records_count as i32 {
|
||||
bail!("end value {x} cannot be greater than or equal to the number of records.")
|
||||
} else if x >= 0 {
|
||||
Some(x as usize)
|
||||
bail!("end value {e} cannot be greater than or equal to the number of records.")
|
||||
} else if e >= 0 {
|
||||
Some(e as usize)
|
||||
} else {
|
||||
Some((records_count as i32 + x) as usize)
|
||||
Some((records_count as i32 + e) as usize)
|
||||
}
|
||||
}
|
||||
(None, Some(l)) => {
|
||||
if l >= records_count as i32 {
|
||||
bail!("end value {l} cannot be greater than or equal to the number of records.")
|
||||
} else if l >= 0 {
|
||||
Some(l as usize)
|
||||
} else {
|
||||
Some((records_count as i32 + l) as usize)
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
|
|
@ -222,27 +231,108 @@ pub fn slice(
|
|||
}
|
||||
|
||||
#[derive(TypedBuilder)]
|
||||
#[builder(builder_method(name=new), build_method(vis="", name=__build))]
|
||||
#[builder(doc, builder_method(name=new, doc="Returns a [`SliceBuilder`](crate::slice::SliceBuilder) to customize row count options by running other methods before getting the sliced rows with the [`execute`](crate::slice::SliceBuilder::execute) method."), build_method(vis="", name=__build))]
|
||||
/// Returns a slice of rows from the CSV data.
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// We have a file `fruits.csv` at the path `tests/resources/fruits.csv`:
|
||||
///
|
||||
/// ```csv
|
||||
/// fruit,price
|
||||
/// apple,2.50
|
||||
/// banana,3.00
|
||||
/// strawberry,1.50
|
||||
/// ```
|
||||
///
|
||||
/// Let's print the first two non-header rows in the CSV file:
|
||||
///
|
||||
/// ```rust
|
||||
/// use czv::{Result, Slice};
|
||||
///
|
||||
/// fn main() -> Result<()> {
|
||||
/// let file_path = "tests/resources/fruits.csv";
|
||||
///
|
||||
/// let output: String = Slice::new()
|
||||
/// .file_path(file_path)
|
||||
/// .start(0)
|
||||
/// .end(2) // exclusive
|
||||
/// .include_header_row(false)
|
||||
/// .execute()?;
|
||||
///
|
||||
/// println!("{output}");
|
||||
///
|
||||
/// Ok(())
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Printed to the terminal we get:
|
||||
///
|
||||
/// ```console
|
||||
/// apple,2.50
|
||||
/// banana,3.00
|
||||
/// ```
|
||||
///
|
||||
/// ## Notes
|
||||
///
|
||||
/// - You may not use `file_path` and `file_data` together, only one may be specified.
|
||||
/// - You may not use `end` and `length` together, only one may be specified.
|
||||
/// - You may not use `index` with any of `start`, `end`, or `length`.
|
||||
pub struct Slice {
|
||||
#[builder(default, setter(strip_option, into))]
|
||||
#[builder(
|
||||
default,
|
||||
setter(
|
||||
doc = "CSV file path (alternative to `file_data`).",
|
||||
strip_option,
|
||||
into
|
||||
)
|
||||
)]
|
||||
file_path: Option<PathBuf>,
|
||||
|
||||
#[builder(default, setter(strip_option, into))]
|
||||
#[builder(
|
||||
default,
|
||||
setter(
|
||||
doc = "CSV file data (alternative to `file_path`).",
|
||||
strip_option,
|
||||
into
|
||||
)
|
||||
)]
|
||||
file_data: Option<String>,
|
||||
|
||||
#[builder(default, setter(strip_option))]
|
||||
#[builder(
|
||||
default,
|
||||
setter(
|
||||
doc = "The index of the record to slice from (0-indexed). If negative, starts from the last record.",
|
||||
strip_option
|
||||
)
|
||||
)]
|
||||
start: Option<i32>,
|
||||
|
||||
#[builder(default, setter(strip_option))]
|
||||
#[builder(
|
||||
default,
|
||||
setter(doc = "The index of the record to slice to.", strip_option)
|
||||
)]
|
||||
end: Option<i32>,
|
||||
|
||||
#[builder(default, setter(strip_option))]
|
||||
#[builder(
|
||||
default,
|
||||
setter(doc = "The length of the slice (alternative to `end`).", strip_option)
|
||||
)]
|
||||
length: Option<i32>,
|
||||
|
||||
#[builder(default, setter(strip_option))]
|
||||
#[builder(
|
||||
default,
|
||||
setter(
|
||||
doc = "Slice a single record. If negative, starts from the last record.",
|
||||
strip_option
|
||||
)
|
||||
)]
|
||||
index: Option<i32>,
|
||||
|
||||
#[builder(default = false)]
|
||||
#[builder(
|
||||
default = false,
|
||||
setter(doc = "Specify whether to include the header row (first row) in the records.")
|
||||
)]
|
||||
include_header_row: bool,
|
||||
}
|
||||
|
||||
|
|
@ -266,6 +356,7 @@ impl<
|
|||
__include_header_row,
|
||||
)>
|
||||
{
|
||||
/// Returns the sliced rows.
|
||||
pub fn execute(self) -> Result<String> {
|
||||
let builder = self.__build();
|
||||
slice(
|
||||
|
|
|
|||
|
|
@ -1,75 +1,100 @@
|
|||
use czv;
|
||||
use czv::Result;
|
||||
// use czv;
|
||||
// use czv::Result;
|
||||
|
||||
#[test]
|
||||
fn test_slice_start_end() -> Result<()> {
|
||||
let cases = vec![(
|
||||
"tests/resources/fruits.csv",
|
||||
1,
|
||||
3,
|
||||
"banana,3.00\nstrawberry,1.50".to_string(),
|
||||
)];
|
||||
for (file_name, start, end, expected) in cases {
|
||||
let got = czv::slice::slice(
|
||||
Some(file_name.into()),
|
||||
None,
|
||||
Some(start),
|
||||
Some(end),
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
)?;
|
||||
assert_eq!(expected, got);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
// #[test]
|
||||
// fn test_slice() -> Result<()> {
|
||||
// let got = czv::Slice::new()
|
||||
// .file_path("tests/resources/fruits.csv")
|
||||
// .execute()?;
|
||||
// let expected = "fruit,price\napple,2.50\nbanana,3.00\nstrawberry,1.50".to_string();
|
||||
// assert_eq!(expected, got);
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_slice_start_end_data() -> Result<()> {
|
||||
let cases = vec![(
|
||||
"fruit,price\napple,2.50\nbanana,3.00\nstrawberry,1.50".to_string(),
|
||||
1,
|
||||
3,
|
||||
"banana,3.00\nstrawberry,1.50".to_string(),
|
||||
)];
|
||||
for (file_data, start, end, expected) in cases {
|
||||
let got = czv::slice::slice(
|
||||
None,
|
||||
Some(file_data),
|
||||
Some(start),
|
||||
Some(end),
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
)?;
|
||||
assert_eq!(expected, got);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
// #[test]
|
||||
// fn test_slice_start_end() -> Result<()> {
|
||||
// let cases = vec![(
|
||||
// "tests/resources/fruits.csv",
|
||||
// 1,
|
||||
// 3,
|
||||
// "banana,3.00\nstrawberry,1.50".to_string(),
|
||||
// )];
|
||||
// for (file_name, start, end, expected) in cases {
|
||||
// let got = czv::slice::slice(
|
||||
// Some(file_name.into()),
|
||||
// None,
|
||||
// Some(start),
|
||||
// Some(end),
|
||||
// None,
|
||||
// None,
|
||||
// false,
|
||||
// )?;
|
||||
// assert_eq!(expected, got);
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_slice_start_0_end_3() -> Result<()> {
|
||||
let expected = "apple,2.50\nbanana,3.00".to_string();
|
||||
let got: String = czv::slice::Slice::new()
|
||||
.file_path("tests/resources/fruits.csv")
|
||||
.start(0)
|
||||
.end(2) // exclusive
|
||||
.include_header_row(false)
|
||||
.execute()?;
|
||||
// #[test]
|
||||
// fn test_slice_start_end_data() -> Result<()> {
|
||||
// let cases = vec![(
|
||||
// "fruit,price\napple,2.50\nbanana,3.00\nstrawberry,1.50".to_string(),
|
||||
// 1,
|
||||
// 3,
|
||||
// "banana,3.00\nstrawberry,1.50".to_string(),
|
||||
// )];
|
||||
// for (file_data, start, end, expected) in cases {
|
||||
// let got = czv::slice::slice(
|
||||
// None,
|
||||
// Some(file_data),
|
||||
// Some(start),
|
||||
// Some(end),
|
||||
// None,
|
||||
// None,
|
||||
// false,
|
||||
// )?;
|
||||
// assert_eq!(expected, got);
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
assert_eq!(expected, got);
|
||||
Ok(())
|
||||
}
|
||||
// #[test]
|
||||
// fn test_slice_start_0_end_3() -> Result<()> {
|
||||
// let expected = "apple,2.50\nbanana,3.00".to_string();
|
||||
// let got: String = czv::slice::Slice::new()
|
||||
// .file_path("tests/resources/fruits.csv")
|
||||
// .start(0)
|
||||
// .end(2) // exclusive
|
||||
// .include_header_row(false)
|
||||
// .execute()?;
|
||||
|
||||
#[test]
|
||||
fn test_slice_index_2() -> Result<()> {
|
||||
let expected = "strawberry,1.50".to_string();
|
||||
let got: String = czv::slice::Slice::new()
|
||||
.file_path("tests/resources/fruits.csv")
|
||||
.index(2)
|
||||
.include_header_row(false)
|
||||
.execute()?;
|
||||
// assert_eq!(expected, got);
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
assert_eq!(expected, got);
|
||||
Ok(())
|
||||
}
|
||||
// #[test]
|
||||
// fn test_slice_index_2() -> Result<()> {
|
||||
// let expected = "strawberry,1.50".to_string();
|
||||
// let got: String = czv::slice::Slice::new()
|
||||
// .file_path("tests/resources/fruits.csv")
|
||||
// .index(2)
|
||||
// .include_header_row(false)
|
||||
// .execute()?;
|
||||
|
||||
// assert_eq!(expected, got);
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn test_slice_index_override() -> Result<()> {
|
||||
// let got = czv::slice::Slice::new()
|
||||
// .file_path("tests/resources/fruits.csv")
|
||||
// .start(1)
|
||||
// .end(3)
|
||||
// .index(2)
|
||||
// .include_header_row(false)
|
||||
// .execute();
|
||||
|
||||
// // Error: CzvError(Cannot use index with start, end, or length.)
|
||||
// assert!(got.is_err());
|
||||
// Ok(())
|
||||
// }
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue