mirror of
https://github.com/rzmk/czv.git
synced 2025-12-19 08:09:24 +00:00
feat: add czv, czv-wasm, and czv-python (init release)
This commit is contained in:
commit
9799ab694b
40 changed files with 70383 additions and 0 deletions
72
czv-python/.gitignore
vendored
Normal file
72
czv-python/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
/target
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
.pytest_cache/
|
||||
*.py[cod]
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
.venv/
|
||||
env/
|
||||
bin/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
include/
|
||||
man/
|
||||
venv/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
pip-selfcheck.json
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
|
||||
# Mr Developer
|
||||
.mr.developer.cfg
|
||||
.project
|
||||
.pydevproject
|
||||
|
||||
# Rope
|
||||
.ropeproject
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
*.pot
|
||||
|
||||
.DS_Store
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyCharm
|
||||
.idea/
|
||||
|
||||
# VSCode
|
||||
.vscode/
|
||||
|
||||
# Pyenv
|
||||
.python-version
|
||||
16
czv-python/Cargo.toml
Normal file
16
czv-python/Cargo.toml
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
[package]
|
||||
name = "czv-python"
|
||||
version = "0.0.0"
|
||||
authors = ["Mueez Khan"]
|
||||
description = "Python library for performing CSV-related functions for data engineering and analysis."
|
||||
repository = "https://github.com/rzmk/czv"
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
csv = "1.3.0"
|
||||
pyo3 = { version = "0.21.2", features = ["extension-module"] }
|
||||
thiserror = "1.0.61"
|
||||
50
czv-python/README.md
Normal file
50
czv-python/README.md
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
# czv-python
|
||||
|
||||
Python library for [czv](https://github.com/rzmk/czv). czv is a library of utility functions for CSV-related data engineering and analysis tasks.
|
||||
|
||||
## Installation and example
|
||||
|
||||
```bash
|
||||
pip install czv
|
||||
```
|
||||
|
||||
```python
|
||||
import czv
|
||||
|
||||
data = """fruits,price
|
||||
apple,2.50
|
||||
banana,3.00
|
||||
strawberry,1.50"""
|
||||
|
||||
output = czv.row_count(data, False)
|
||||
|
||||
print(output)
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
You'll need to have [maturin](https://github.com/PyO3/maturin/) and [uv](https://github.com/astral-sh/uv) installed. Set up a local virtual environment in the `czv-python` folder by running:
|
||||
|
||||
```bash
|
||||
uv venv
|
||||
```
|
||||
|
||||
Make sure to activate the virtual environment (instructions should be provided in your terminal after running the previous command).
|
||||
|
||||
Once you've activated the virtual environment, install dependencies by running:
|
||||
|
||||
```bash
|
||||
uv pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Build package in local environment
|
||||
|
||||
```bash
|
||||
maturin develop --uv --release
|
||||
```
|
||||
|
||||
### Run tests
|
||||
|
||||
```bash
|
||||
pytest
|
||||
```
|
||||
46
czv-python/czv.pyi
Normal file
46
czv-python/czv.pyi
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
"""
|
||||
# czv
|
||||
|
||||
Python library for [czv](https://github.com/rzmk/czv). CSV content manipulation and analysis.
|
||||
|
||||
## Install
|
||||
|
||||
```bash
|
||||
pip install czv
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
```python
|
||||
from czv import row_count
|
||||
|
||||
data = \"""fruits,price
|
||||
apple,2.50
|
||||
banana,3.00
|
||||
strawberry,1.50\"""
|
||||
|
||||
output = row_count(data, False)
|
||||
|
||||
print(output)
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
def row_count(file_data: str, include_header_row: Optional[bool]) -> int:
|
||||
"""Returns a count of the total number of rows.
|
||||
|
||||
## Arguments
|
||||
|
||||
* `file_data` - CSV file data.
|
||||
* `include_header_row` - Specify whether to include the header row (first row) in the row count. Default is false.
|
||||
"""
|
||||
|
||||
def column_count(file_data: str) -> int:
|
||||
"""Returns a count of the total number of columns (fields).
|
||||
|
||||
## Arguments
|
||||
|
||||
* `file_data` - CSV file data.
|
||||
"""
|
||||
10
czv-python/examples/simple_row_count.py
Normal file
10
czv-python/examples/simple_row_count.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
import czv
|
||||
|
||||
data = """fruits,price
|
||||
apple,2.50
|
||||
banana,3.00
|
||||
strawberry,1.50"""
|
||||
|
||||
output = czv.row_count(data, False)
|
||||
|
||||
print(output)
|
||||
15
czv-python/pyproject.toml
Normal file
15
czv-python/pyproject.toml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
[build-system]
|
||||
requires = ["maturin>=1.6,<2.0"]
|
||||
build-backend = "maturin"
|
||||
|
||||
[project]
|
||||
name = "czv"
|
||||
requires-python = ">=3.8"
|
||||
classifiers = [
|
||||
"Programming Language :: Rust",
|
||||
"Programming Language :: Python :: Implementation :: CPython",
|
||||
"Programming Language :: Python :: Implementation :: PyPy",
|
||||
]
|
||||
dynamic = ["version"]
|
||||
[tool.maturin]
|
||||
features = ["pyo3/extension-module"]
|
||||
2
czv-python/requirements.txt
Normal file
2
czv-python/requirements.txt
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
maturin
|
||||
pytest
|
||||
33
czv-python/src/count.rs
Normal file
33
czv-python/src/count.rs
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
use crate::Result;
|
||||
use csv::ReaderBuilder;
|
||||
use pyo3::pyfunction;
|
||||
|
||||
/// Returns a count of the total number of rows.
|
||||
///
|
||||
/// ## Arguments
|
||||
///
|
||||
/// * `file_data` - CSV file data.
|
||||
/// * `include_header_row` - Specify whether to include the header row (first row) in the row count.
|
||||
#[pyfunction]
|
||||
pub fn row_count(file_data: String, include_header_row: Option<bool>) -> Result<usize> {
|
||||
let mut rdr = ReaderBuilder::new();
|
||||
|
||||
rdr.has_headers(!include_header_row.unwrap_or(false));
|
||||
return Ok(rdr.from_reader(file_data.as_bytes()).records().count());
|
||||
}
|
||||
|
||||
/// Returns a count of the total number of columns (fields).
|
||||
///
|
||||
/// ## Arguments
|
||||
///
|
||||
/// * `file_data` - CSV file data.
|
||||
#[pyfunction]
|
||||
pub fn column_count(file_data: Option<String>) -> Result<usize> {
|
||||
let rdr = ReaderBuilder::new();
|
||||
|
||||
if let Some(file_data) = file_data {
|
||||
return Ok(rdr.from_reader(file_data.as_bytes()).headers()?.len());
|
||||
} else {
|
||||
bail!("Could not determine a file path or file data for column_count_builder.");
|
||||
}
|
||||
}
|
||||
43
czv-python/src/lib.rs
Normal file
43
czv-python/src/lib.rs
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
use pyo3::prelude::*;
|
||||
|
||||
// Error-handling helpers
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
#[error("{0}")]
|
||||
pub struct CzvError(anyhow::Error);
|
||||
|
||||
impl From<pyo3::PyErr> for CzvError {
|
||||
fn from(value: pyo3::PyErr) -> Self {
|
||||
value.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<csv::Error> for CzvError {
|
||||
fn from(value: csv::Error) -> Self {
|
||||
value.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CzvError> for pyo3::PyErr {
|
||||
fn from(value: CzvError) -> Self {
|
||||
value.into()
|
||||
}
|
||||
}
|
||||
|
||||
pub type Result<T> = anyhow::Result<T, CzvError>;
|
||||
|
||||
#[allow(unused_macros)]
|
||||
macro_rules! bail {
|
||||
($err:expr $(,)?) => {
|
||||
return Err(crate::CzvError(anyhow::anyhow!($err)))
|
||||
};
|
||||
}
|
||||
|
||||
// Command imports
|
||||
pub mod count;
|
||||
|
||||
#[pymodule]
|
||||
fn czv(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_function(wrap_pyfunction!(count::row_count, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(count::column_count, m)?)?;
|
||||
Ok(())
|
||||
}
|
||||
0
czv-python/tests/__init__.py
Normal file
0
czv-python/tests/__init__.py
Normal file
33972
czv-python/tests/data/constituents_altnames.csv
Normal file
33972
czv-python/tests/data/constituents_altnames.csv
Normal file
File diff suppressed because it is too large
Load diff
4
czv-python/tests/data/fruits.csv
Normal file
4
czv-python/tests/data/fruits.csv
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
fruit,price
|
||||
apple,2.50
|
||||
banana,3.00
|
||||
strawberry,1.50
|
||||
|
24
czv-python/tests/test_count.py
Normal file
24
czv-python/tests/test_count.py
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
import czv
|
||||
import pytest
|
||||
from .test_data import test_data
|
||||
|
||||
class TestCountFunc:
|
||||
@pytest.mark.parametrize(
|
||||
"file_name,expected",
|
||||
[("fruits.csv", 3), ("constituents_altnames.csv", 33971)],
|
||||
)
|
||||
def test_count(self, file_name, expected):
|
||||
"""Count the total number of non-header rows."""
|
||||
|
||||
result = czv.row_count(test_data[file_name].read_text())
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"file_name,expected",
|
||||
[("fruits.csv", 4), ("constituents_altnames.csv", 33972)],
|
||||
)
|
||||
def test_include_header_row(self, file_name, expected):
|
||||
"""Count the total number of rows including the header row."""
|
||||
|
||||
result = czv.row_count(test_data[file_name].read_text(), include_header_row=True)
|
||||
assert result == expected
|
||||
7
czv-python/tests/test_data.py
Normal file
7
czv-python/tests/test_data.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
import pathlib
|
||||
|
||||
data_path = pathlib.Path(__file__).parent.resolve().joinpath("data")
|
||||
test_data = {
|
||||
file_name: data_path.joinpath(file_name)
|
||||
for file_name in ["fruits.csv", "constituents_altnames.csv"]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue