refactor: use czv in czv-python

- Add `file_path` for czv-python count operations
- Refactor tests based on file path instead of data
- Use thiserror in czv
- Update examples to reflect changes
This commit is contained in:
rzmk 2024-06-20 01:03:54 -04:00
parent 9799ab694b
commit ce260e9491
14 changed files with 79 additions and 55 deletions

2
Cargo.lock generated
View file

@ -223,6 +223,7 @@ dependencies = [
"anyhow", "anyhow",
"criterion", "criterion",
"csv", "csv",
"thiserror",
"typed-builder", "typed-builder",
] ]
@ -232,6 +233,7 @@ version = "0.0.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"csv", "csv",
"czv",
"pyo3", "pyo3",
"thiserror", "thiserror",
] ]

0
LICENSE Normal file
View file

View file

@ -67,7 +67,7 @@ apple,2.50
banana,3.00 banana,3.00
strawberry,1.50""" strawberry,1.50"""
output = czv.row_count(data, False) output = czv.row_count(file_data=data)
print(output) print(output)
``` ```

View file

@ -12,5 +12,6 @@ crate-type = ["cdylib", "rlib"]
[dependencies] [dependencies]
anyhow = "1.0.86" anyhow = "1.0.86"
csv = "1.3.0" csv = "1.3.0"
czv = { path = "../czv" }
pyo3 = { version = "0.21.2", features = ["extension-module"] } pyo3 = { version = "0.21.2", features = ["extension-module"] }
thiserror = "1.0.61" thiserror = "1.0.61"

View file

@ -1,24 +1,26 @@
# czv-python # czv-python
Python library for [czv](https://github.com/rzmk/czv). czv is a library of utility functions for CSV-related data engineering and analysis tasks. Python library for [czv](https://github.com/rzmk/czv). czv is a library of CSV-related operations for data engineering and analysis tasks.
- For a Rust library see [czv](https://github.com/rzmk/czv/tree/main/czv).
- For a WebAssembly (JavaScript, TypeScript) library see [czv-wasm](https://github.com/rzmk/czv/tree/main/czv-wasm).
## Installation and example ## Installation and example
To install `czv`, run:
```bash ```bash
pip install czv pip install czv
``` ```
Let's say we want to print the total number of rows in a 4x3 CSV file `fruits.csv` including the header row:
```python ```python
import czv import czv
data = """fruits,price output = czv.row_count(file_path="fruits.csv", include_header_row=True)
apple,2.50
banana,3.00
strawberry,1.50"""
output = czv.row_count(data, False) print(output) # 4
print(output)
``` ```
## Development ## Development

View file

@ -14,12 +14,7 @@ pip install czv
```python ```python
from czv import row_count from czv import row_count
data = \"""fruits,price output = row_count(file_path="fruits.csv")
apple,2.50
banana,3.00
strawberry,1.50\"""
output = row_count(data, False)
print(output) print(output)
``` ```
@ -27,20 +22,23 @@ print(output)
""" """
from typing import Optional from typing import Optional
from pathlib import Path
def row_count(file_data: str, include_header_row: Optional[bool]) -> int: def row_count(file_path: Optional[Path], file_data: Optional[str], include_header_row: Optional[bool]) -> int:
"""Returns a count of the total number of rows. """Returns a count of the total number of rows.
## Arguments ## Arguments
* `file_path` - CSV file path.
* `file_data` - CSV file data. * `file_data` - CSV file data.
* `include_header_row` - Specify whether to include the header row (first row) in the row count. Default is false. * `include_header_row` - Specify whether to include the header row (first row) in the row count. Default is false.
""" """
def column_count(file_data: str) -> int: def column_count(file_path: Optional[Path], file_data: Optional[str]) -> int:
"""Returns a count of the total number of columns (fields). """Returns a count of the total number of columns (fields).
## Arguments ## Arguments
* `file_path` - CSV file path.
* `file_data` - CSV file data. * `file_data` - CSV file data.
""" """

View file

@ -5,6 +5,6 @@ apple,2.50
banana,3.00 banana,3.00
strawberry,1.50""" strawberry,1.50"""
output = czv.row_count(data, False) output = czv.row_count(file_data=data, include_header_row=True)
print(output) print(output) # 4

View file

@ -1,33 +1,21 @@
use crate::Result; use crate::Result;
use csv::ReaderBuilder;
use pyo3::pyfunction; use pyo3::pyfunction;
use std::path::PathBuf;
/// Returns a count of the total number of rows.
///
/// ## Arguments
///
/// * `file_data` - CSV file data.
/// * `include_header_row` - Specify whether to include the header row (first row) in the row count.
#[pyfunction] #[pyfunction]
pub fn row_count(file_data: String, include_header_row: Option<bool>) -> Result<usize> { pub fn row_count(
let mut rdr = ReaderBuilder::new(); file_path: Option<PathBuf>,
file_data: Option<String>,
rdr.has_headers(!include_header_row.unwrap_or(false)); include_header_row: Option<bool>,
return Ok(rdr.from_reader(file_data.as_bytes()).records().count()); ) -> Result<usize> {
Ok(czv::count::row_count(
file_path,
file_data,
include_header_row.unwrap_or(false),
)?)
} }
/// Returns a count of the total number of columns (fields).
///
/// ## Arguments
///
/// * `file_data` - CSV file data.
#[pyfunction] #[pyfunction]
pub fn column_count(file_data: Option<String>) -> Result<usize> { pub fn column_count(file_path: Option<PathBuf>, file_data: Option<String>) -> Result<usize> {
let rdr = ReaderBuilder::new(); Ok(czv::count::column_count(file_path, file_data)?)
if let Some(file_data) = file_data {
return Ok(rdr.from_reader(file_data.as_bytes()).headers()?.len());
} else {
bail!("Could not determine a file path or file data for column_count_builder.");
}
} }

View file

@ -1,3 +1,4 @@
use ::czv::CzvError as OGError;
use pyo3::prelude::*; use pyo3::prelude::*;
// Error-handling helpers // Error-handling helpers
@ -5,6 +6,12 @@ use pyo3::prelude::*;
#[error("{0}")] #[error("{0}")]
pub struct CzvError(anyhow::Error); pub struct CzvError(anyhow::Error);
impl From<OGError> for CzvError {
fn from(value: OGError) -> Self {
value.into()
}
}
impl From<pyo3::PyErr> for CzvError { impl From<pyo3::PyErr> for CzvError {
fn from(value: pyo3::PyErr) -> Self { fn from(value: pyo3::PyErr) -> Self {
value.into() value.into()

View file

@ -10,7 +10,7 @@ class TestCountFunc:
def test_count(self, file_name, expected): def test_count(self, file_name, expected):
"""Count the total number of non-header rows.""" """Count the total number of non-header rows."""
result = czv.row_count(test_data[file_name].read_text()) result = czv.row_count(file_path=test_data[file_name])
assert result == expected assert result == expected
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -20,5 +20,5 @@ class TestCountFunc:
def test_include_header_row(self, file_name, expected): def test_include_header_row(self, file_name, expected):
"""Count the total number of rows including the header row.""" """Count the total number of rows including the header row."""
result = czv.row_count(test_data[file_name].read_text(), include_header_row=True) result = czv.row_count(file_path=test_data[file_name], include_header_row=True)
assert result == expected assert result == expected

View file

@ -1,6 +1,9 @@
# czv-wasm # czv-wasm
WebAssembly (JavaScript and TypeScript) library for [czv](https://github.com/rzmk/czv). czv is a library of utility functions for CSV-related data engineering and analysis tasks. WebAssembly (JavaScript and TypeScript) library for [czv](https://github.com/rzmk/czv). czv is a library of CSV-related operations for data engineering and analysis tasks.
- For a Rust library see [czv](https://github.com/rzmk/czv/tree/main/czv).
- For a Python library see [czv-python](https://github.com/rzmk/czv/tree/main/czv-python).
## Installation and example ## Installation and example

View file

@ -6,12 +6,13 @@ description = "Rust library for performing CSV-related operations for data engin
repository = "https://github.com/rzmk/czv" repository = "https://github.com/rzmk/czv"
edition = "2021" edition = "2021"
license = "MIT OR Apache-2.0" license = "MIT OR Apache-2.0"
keywords = ["csv", "library"] keywords = ["csv", "library", "data"]
categories = ["text-processing"] categories = ["text-processing"]
[dependencies] [dependencies]
anyhow = "1.0.86" anyhow = "1.0.86"
csv = "1.3.0" csv = "1.3.0"
thiserror = "1.0.61"
typed-builder = "0.18.2" typed-builder = "0.18.2"
[dev-dependencies] [dev-dependencies]

View file

@ -1,11 +1,12 @@
# czv # czv
Rust library for [czv](https://github.com/rzmk/czv). czv is a library of utility functions for CSV-related data engineering and analysis tasks. Rust library for [czv](https://github.com/rzmk/czv). czv is a library of CSV-related operations for data engineering and analysis tasks.
- For a WebAssembly (JavaScript, TypeScript) library see [czv-wasm](https://github.com/rzmk/czv/tree/main/czv-wasm).
- For a Python library see [czv-python](https://github.com/rzmk/czv/tree/main/czv-python).
## Usage ## Usage
You must have [Rust](https://www.rust-lang.org/tools/install) and Cargo installed (Cargo may be additionally installed when you install Rust with `rustup`).
To install `czv`, run: To install `czv`, run:
```bash ```bash
@ -18,10 +19,12 @@ Let's say we want to print the total number of rows in a 4x3 CSV file `fruits.cs
use czv::{count::RowCount, Result}; use czv::{count::RowCount, Result};
fn main() -> Result<()> { fn main() -> Result<()> {
let data = r#"fruits,price let data = "\
fruits,price
apple,2.50 apple,2.50
banana,3.00 banana,3.00
strawberry,1.50"#; strawberry,1.50
";
let output = RowCount::new() let output = RowCount::new()
.file_data(data) .file_data(data)
.include_header_row(true) .include_header_row(true)
@ -54,3 +57,20 @@ cargo bench
``` ```
For benchmarks we use [criterion.rs](https://github.com/bheisler/criterion.rs). For benchmarks we use [criterion.rs](https://github.com/bheisler/criterion.rs).
## License
Licensed under either of
- Apache License, Version 2.0
([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
- MIT license
([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.
## Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall be
dual licensed as above, without any additional terms or conditions.

View file

@ -32,8 +32,10 @@ pub mod count;
pub mod slice; pub mod slice;
#[allow(dead_code)] #[allow(dead_code)]
#[derive(Debug)] // Error-handling helpers
pub struct CzvError(anyhow::Error); #[derive(thiserror::Error, Debug)]
#[error("{0}")]
pub struct CzvError(pub anyhow::Error);
impl From<anyhow::Error> for CzvError { impl From<anyhow::Error> for CzvError {
fn from(value: anyhow::Error) -> Self { fn from(value: anyhow::Error) -> Self {