mirror of
https://github.com/rzmk/czv.git
synced 2025-12-19 08:09:24 +00:00
refactor: use czv in czv-python
- Add `file_path` for czv-python count operations - Refactor tests based on file path instead of data - Use thiserror in czv - Update examples to reflect changes
This commit is contained in:
parent
9799ab694b
commit
ce260e9491
14 changed files with 79 additions and 55 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -223,6 +223,7 @@ dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"criterion",
|
"criterion",
|
||||||
"csv",
|
"csv",
|
||||||
|
"thiserror",
|
||||||
"typed-builder",
|
"typed-builder",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -232,6 +233,7 @@ version = "0.0.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"csv",
|
"csv",
|
||||||
|
"czv",
|
||||||
"pyo3",
|
"pyo3",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
0
LICENSE
Normal file
0
LICENSE
Normal file
|
|
@ -67,7 +67,7 @@ apple,2.50
|
||||||
banana,3.00
|
banana,3.00
|
||||||
strawberry,1.50"""
|
strawberry,1.50"""
|
||||||
|
|
||||||
output = czv.row_count(data, False)
|
output = czv.row_count(file_data=data)
|
||||||
|
|
||||||
print(output)
|
print(output)
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -12,5 +12,6 @@ crate-type = ["cdylib", "rlib"]
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.86"
|
anyhow = "1.0.86"
|
||||||
csv = "1.3.0"
|
csv = "1.3.0"
|
||||||
|
czv = { path = "../czv" }
|
||||||
pyo3 = { version = "0.21.2", features = ["extension-module"] }
|
pyo3 = { version = "0.21.2", features = ["extension-module"] }
|
||||||
thiserror = "1.0.61"
|
thiserror = "1.0.61"
|
||||||
|
|
|
||||||
|
|
@ -1,24 +1,26 @@
|
||||||
# czv-python
|
# czv-python
|
||||||
|
|
||||||
Python library for [czv](https://github.com/rzmk/czv). czv is a library of utility functions for CSV-related data engineering and analysis tasks.
|
Python library for [czv](https://github.com/rzmk/czv). czv is a library of CSV-related operations for data engineering and analysis tasks.
|
||||||
|
|
||||||
|
- For a Rust library see [czv](https://github.com/rzmk/czv/tree/main/czv).
|
||||||
|
- For a WebAssembly (JavaScript, TypeScript) library see [czv-wasm](https://github.com/rzmk/czv/tree/main/czv-wasm).
|
||||||
|
|
||||||
## Installation and example
|
## Installation and example
|
||||||
|
|
||||||
|
To install `czv`, run:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install czv
|
pip install czv
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Let's say we want to print the total number of rows in a 4x3 CSV file `fruits.csv` including the header row:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import czv
|
import czv
|
||||||
|
|
||||||
data = """fruits,price
|
output = czv.row_count(file_path="fruits.csv", include_header_row=True)
|
||||||
apple,2.50
|
|
||||||
banana,3.00
|
|
||||||
strawberry,1.50"""
|
|
||||||
|
|
||||||
output = czv.row_count(data, False)
|
print(output) # 4
|
||||||
|
|
||||||
print(output)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
|
||||||
|
|
@ -14,12 +14,7 @@ pip install czv
|
||||||
```python
|
```python
|
||||||
from czv import row_count
|
from czv import row_count
|
||||||
|
|
||||||
data = \"""fruits,price
|
output = row_count(file_path="fruits.csv")
|
||||||
apple,2.50
|
|
||||||
banana,3.00
|
|
||||||
strawberry,1.50\"""
|
|
||||||
|
|
||||||
output = row_count(data, False)
|
|
||||||
|
|
||||||
print(output)
|
print(output)
|
||||||
```
|
```
|
||||||
|
|
@ -27,20 +22,23 @@ print(output)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
def row_count(file_data: str, include_header_row: Optional[bool]) -> int:
|
def row_count(file_path: Optional[Path], file_data: Optional[str], include_header_row: Optional[bool]) -> int:
|
||||||
"""Returns a count of the total number of rows.
|
"""Returns a count of the total number of rows.
|
||||||
|
|
||||||
## Arguments
|
## Arguments
|
||||||
|
|
||||||
|
* `file_path` - CSV file path.
|
||||||
* `file_data` - CSV file data.
|
* `file_data` - CSV file data.
|
||||||
* `include_header_row` - Specify whether to include the header row (first row) in the row count. Default is false.
|
* `include_header_row` - Specify whether to include the header row (first row) in the row count. Default is false.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def column_count(file_data: str) -> int:
|
def column_count(file_path: Optional[Path], file_data: Optional[str]) -> int:
|
||||||
"""Returns a count of the total number of columns (fields).
|
"""Returns a count of the total number of columns (fields).
|
||||||
|
|
||||||
## Arguments
|
## Arguments
|
||||||
|
|
||||||
|
* `file_path` - CSV file path.
|
||||||
* `file_data` - CSV file data.
|
* `file_data` - CSV file data.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,6 @@ apple,2.50
|
||||||
banana,3.00
|
banana,3.00
|
||||||
strawberry,1.50"""
|
strawberry,1.50"""
|
||||||
|
|
||||||
output = czv.row_count(data, False)
|
output = czv.row_count(file_data=data, include_header_row=True)
|
||||||
|
|
||||||
print(output)
|
print(output) # 4
|
||||||
|
|
|
||||||
|
|
@ -1,33 +1,21 @@
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
use csv::ReaderBuilder;
|
|
||||||
use pyo3::pyfunction;
|
use pyo3::pyfunction;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
/// Returns a count of the total number of rows.
|
|
||||||
///
|
|
||||||
/// ## Arguments
|
|
||||||
///
|
|
||||||
/// * `file_data` - CSV file data.
|
|
||||||
/// * `include_header_row` - Specify whether to include the header row (first row) in the row count.
|
|
||||||
#[pyfunction]
|
#[pyfunction]
|
||||||
pub fn row_count(file_data: String, include_header_row: Option<bool>) -> Result<usize> {
|
pub fn row_count(
|
||||||
let mut rdr = ReaderBuilder::new();
|
file_path: Option<PathBuf>,
|
||||||
|
file_data: Option<String>,
|
||||||
rdr.has_headers(!include_header_row.unwrap_or(false));
|
include_header_row: Option<bool>,
|
||||||
return Ok(rdr.from_reader(file_data.as_bytes()).records().count());
|
) -> Result<usize> {
|
||||||
|
Ok(czv::count::row_count(
|
||||||
|
file_path,
|
||||||
|
file_data,
|
||||||
|
include_header_row.unwrap_or(false),
|
||||||
|
)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a count of the total number of columns (fields).
|
|
||||||
///
|
|
||||||
/// ## Arguments
|
|
||||||
///
|
|
||||||
/// * `file_data` - CSV file data.
|
|
||||||
#[pyfunction]
|
#[pyfunction]
|
||||||
pub fn column_count(file_data: Option<String>) -> Result<usize> {
|
pub fn column_count(file_path: Option<PathBuf>, file_data: Option<String>) -> Result<usize> {
|
||||||
let rdr = ReaderBuilder::new();
|
Ok(czv::count::column_count(file_path, file_data)?)
|
||||||
|
|
||||||
if let Some(file_data) = file_data {
|
|
||||||
return Ok(rdr.from_reader(file_data.as_bytes()).headers()?.len());
|
|
||||||
} else {
|
|
||||||
bail!("Could not determine a file path or file data for column_count_builder.");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
use ::czv::CzvError as OGError;
|
||||||
use pyo3::prelude::*;
|
use pyo3::prelude::*;
|
||||||
|
|
||||||
// Error-handling helpers
|
// Error-handling helpers
|
||||||
|
|
@ -5,6 +6,12 @@ use pyo3::prelude::*;
|
||||||
#[error("{0}")]
|
#[error("{0}")]
|
||||||
pub struct CzvError(anyhow::Error);
|
pub struct CzvError(anyhow::Error);
|
||||||
|
|
||||||
|
impl From<OGError> for CzvError {
|
||||||
|
fn from(value: OGError) -> Self {
|
||||||
|
value.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl From<pyo3::PyErr> for CzvError {
|
impl From<pyo3::PyErr> for CzvError {
|
||||||
fn from(value: pyo3::PyErr) -> Self {
|
fn from(value: pyo3::PyErr) -> Self {
|
||||||
value.into()
|
value.into()
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ class TestCountFunc:
|
||||||
def test_count(self, file_name, expected):
|
def test_count(self, file_name, expected):
|
||||||
"""Count the total number of non-header rows."""
|
"""Count the total number of non-header rows."""
|
||||||
|
|
||||||
result = czv.row_count(test_data[file_name].read_text())
|
result = czv.row_count(file_path=test_data[file_name])
|
||||||
assert result == expected
|
assert result == expected
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
|
@ -20,5 +20,5 @@ class TestCountFunc:
|
||||||
def test_include_header_row(self, file_name, expected):
|
def test_include_header_row(self, file_name, expected):
|
||||||
"""Count the total number of rows including the header row."""
|
"""Count the total number of rows including the header row."""
|
||||||
|
|
||||||
result = czv.row_count(test_data[file_name].read_text(), include_header_row=True)
|
result = czv.row_count(file_path=test_data[file_name], include_header_row=True)
|
||||||
assert result == expected
|
assert result == expected
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,9 @@
|
||||||
# czv-wasm
|
# czv-wasm
|
||||||
|
|
||||||
WebAssembly (JavaScript and TypeScript) library for [czv](https://github.com/rzmk/czv). czv is a library of utility functions for CSV-related data engineering and analysis tasks.
|
WebAssembly (JavaScript and TypeScript) library for [czv](https://github.com/rzmk/czv). czv is a library of CSV-related operations for data engineering and analysis tasks.
|
||||||
|
|
||||||
|
- For a Rust library see [czv](https://github.com/rzmk/czv/tree/main/czv).
|
||||||
|
- For a Python library see [czv-python](https://github.com/rzmk/czv/tree/main/czv-python).
|
||||||
|
|
||||||
## Installation and example
|
## Installation and example
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,12 +6,13 @@ description = "Rust library for performing CSV-related operations for data engin
|
||||||
repository = "https://github.com/rzmk/czv"
|
repository = "https://github.com/rzmk/czv"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
license = "MIT OR Apache-2.0"
|
license = "MIT OR Apache-2.0"
|
||||||
keywords = ["csv", "library"]
|
keywords = ["csv", "library", "data"]
|
||||||
categories = ["text-processing"]
|
categories = ["text-processing"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.86"
|
anyhow = "1.0.86"
|
||||||
csv = "1.3.0"
|
csv = "1.3.0"
|
||||||
|
thiserror = "1.0.61"
|
||||||
typed-builder = "0.18.2"
|
typed-builder = "0.18.2"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,12 @@
|
||||||
# czv
|
# czv
|
||||||
|
|
||||||
Rust library for [czv](https://github.com/rzmk/czv). czv is a library of utility functions for CSV-related data engineering and analysis tasks.
|
Rust library for [czv](https://github.com/rzmk/czv). czv is a library of CSV-related operations for data engineering and analysis tasks.
|
||||||
|
|
||||||
|
- For a WebAssembly (JavaScript, TypeScript) library see [czv-wasm](https://github.com/rzmk/czv/tree/main/czv-wasm).
|
||||||
|
- For a Python library see [czv-python](https://github.com/rzmk/czv/tree/main/czv-python).
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
You must have [Rust](https://www.rust-lang.org/tools/install) and Cargo installed (Cargo may be additionally installed when you install Rust with `rustup`).
|
|
||||||
|
|
||||||
To install `czv`, run:
|
To install `czv`, run:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
@ -18,10 +19,12 @@ Let's say we want to print the total number of rows in a 4x3 CSV file `fruits.cs
|
||||||
use czv::{count::RowCount, Result};
|
use czv::{count::RowCount, Result};
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
let data = r#"fruits,price
|
let data = "\
|
||||||
|
fruits,price
|
||||||
apple,2.50
|
apple,2.50
|
||||||
banana,3.00
|
banana,3.00
|
||||||
strawberry,1.50"#;
|
strawberry,1.50
|
||||||
|
";
|
||||||
let output = RowCount::new()
|
let output = RowCount::new()
|
||||||
.file_data(data)
|
.file_data(data)
|
||||||
.include_header_row(true)
|
.include_header_row(true)
|
||||||
|
|
@ -54,3 +57,20 @@ cargo bench
|
||||||
```
|
```
|
||||||
|
|
||||||
For benchmarks we use [criterion.rs](https://github.com/bheisler/criterion.rs).
|
For benchmarks we use [criterion.rs](https://github.com/bheisler/criterion.rs).
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
Licensed under either of
|
||||||
|
|
||||||
|
- Apache License, Version 2.0
|
||||||
|
([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
|
||||||
|
- MIT license
|
||||||
|
([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
at your option.
|
||||||
|
|
||||||
|
## Contribution
|
||||||
|
|
||||||
|
Unless you explicitly state otherwise, any contribution intentionally submitted
|
||||||
|
for inclusion in the work by you, as defined in the Apache-2.0 license, shall be
|
||||||
|
dual licensed as above, without any additional terms or conditions.
|
||||||
|
|
|
||||||
|
|
@ -32,8 +32,10 @@ pub mod count;
|
||||||
pub mod slice;
|
pub mod slice;
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
#[derive(Debug)]
|
// Error-handling helpers
|
||||||
pub struct CzvError(anyhow::Error);
|
#[derive(thiserror::Error, Debug)]
|
||||||
|
#[error("{0}")]
|
||||||
|
pub struct CzvError(pub anyhow::Error);
|
||||||
|
|
||||||
impl From<anyhow::Error> for CzvError {
|
impl From<anyhow::Error> for CzvError {
|
||||||
fn from(value: anyhow::Error) -> Self {
|
fn from(value: anyhow::Error) -> Self {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue