Assay

Pollution measurement.

`Assay` `dataclass`

Bases: BaseMixin

A single pollution assay.

Attributes:

Name	Type	Description
`ident`	`str`	unique identifier
`lat`	`float`	latitude where assay performed (from grid cell)
`lon`	`float`	longitude where assay performed (from grid cell)
`person_id`	`str`	who performed assay (from persons)
`machine_id`	`str`	machine used for assay (from machines)
`performed`	`date \| None`	date assay was performed (possibly missing)
`contents`	`str`	'C' for control or 'T' for treatment
`readings`	`list[float]`	readings for contents

Source code in src/snailz/assay.py

@dataclass
class Assay(BaseMixin):
    """
    A single pollution assay.

    Attributes:
        ident: unique identifier
        lat: latitude where assay performed (from grid cell)
        lon: longitude where assay performed (from grid cell)
        person_id: who performed assay (from persons)
        machine_id: machine used for assay (from machines)
        performed: date assay was performed (possibly missing)
        contents: 'C' for control or 'T' for treatment
        readings: readings for contents
    """

    primary_key: ClassVar[str] = "ident"
    foreign_keys: ClassVar[ForeignKeysType] = [
        ("lat", "grid_cells", "lat"),
        ("lon", "grid_cells", "lon"),
        ("person_id", "person", "ident"),
        ("machine_id", "machine", "ident"),
    ]
    nullable_keys: ClassVar[set[str]] = {"performed"}
    pivot_keys: ClassVar[set[str]] = {"contents", "readings"}
    _next_id: ClassVar[IdGeneratorType] = id_generator("A", 4)

    ident: str = ""
    lat: float = 0.0
    lon: float = 0.0
    person_id: str = ""
    machine_id: str = ""
    performed: date | None = None
    contents: str = ""
    readings: list[float] = field(default_factory=list)

    def __post_init__(self):
        """
        Generate unique identifier.
        """
        validate(
            (self.performed is None) or (self.performed > date.min),
            "assay must have sensible date"
        )

        self.ident = next(self._next_id)

    @classmethod
    def make(
        cls, params: Parameters, grids: list[Grid], ratings: list[Rating]
    ) -> list["Assay"]:
        """
        Construct multiple assays.

        Args:
            params: Parameters object.
            grids: Grids that samples are taken from.
            ratings: Proficiencies with machines.

        Returns:
            List of assays.
        """

        result = []
        for _ in range(params.num_assays):
            g = random.choice(grids)
            x, y = random.randint(0, g.size - 1), random.randint(0, g.size - 1)
            lat, lon = g.lat_lon(x, y)
            rat = random.choice(ratings)
            performed = random_date(params.start_date, params.end_date, params.p_date_missing)
            contents = cls._random_contents(params)
            readings = cls._random_readings(params, contents, g[x, y], rat.certified)
            result.append(
                Assay(
                    lat=lat,
                    lon=lon,
                    person_id=rat.person_id,
                    machine_id=rat.machine_id,
                    performed=performed,
                    contents=contents,
                    readings=readings,
                )
            )
        return result

    @classmethod
    def save_csv(cls, outdir: Path | str, objects: list):
        """
        Save assays as CSV. Scalar properties of all assays are saved in
        one file; assay measurements are pivoted to long form and saved
        in a separate file.

        Args:
            outdir: Output directory.
            objects: `Assay` objects to save.
        """

        super().save_csv(outdir, objects)

        with open(Path(outdir, "assay_readings.csv"), "w", newline="") as stream:
            pivoted = cls._assay_readings(objects)
            writer = cls._csv_dict_writer(stream, list(pivoted[0].keys()))
            for obj in pivoted:
                writer.writerow(obj)

    @classmethod
    def save_db(cls, db: Database, objects: list):
        """
        Save assays to database. Scalar properties of all assays are
        saved in one table; assay readings are pivoted to long form
        and saved in a separate table.

        Args:
            db: Database connector.
            objects: `Assay` objects to save.
        """

        super().save_db(db, objects)

        table = db["assay_readings"]
        table.insert_all(  # type: ignore[possibly-missing-attribute]
            cls._assay_readings(objects),
            pk=("assay_id", "reading_id"),
            foreign_keys=[("assay_id", "assay", "ident")],
        )

    @classmethod
    def table_name(cls) -> str:
        """Database table name."""

        return "assay"

    @classmethod
    def _assay_readings(cls, assays: list[Self]) -> list[dict[str, str | float]]:
        """
        Get assay readings in long format for persistence.

        Args:
            assays: Assays to pivot.

        Returns:
            List of persistable dictionaries.
        """

        return [
            {"assay_id": a.ident, "reading_id": i + 1, "contents": c, "reading": r}
            for a in assays
            for i, (c, r) in enumerate(zip(a.contents, a.readings))
        ]

    @classmethod
    def _random_contents(cls, params: Parameters) -> str:
        """
        Generate random control or treatment indicators.

        Args:
            params: Control parameters.

        Returns:
            String of "CT".
        """

        num_controls = params.assay_size // 2
        num_treatments = params.assay_size - num_controls
        contents = ["C"] * num_controls + ["T"] * num_treatments
        random.shuffle(contents)
        return "".join(contents)

    @classmethod
    def _random_readings(
        cls, params: Parameters, contents: str, target: float, certified: bool
    ) -> list[float]:
        """
        Generate random readings clustered around target value.

        Args:
            params: Control parameters.
            contents: "CT" string showing control or treatment.
            target: Desired mean result.
            certified: Whether person is certified for machine being used.

        Returns:
            List of assay readings.
        """

        scale = params.assay_certified if certified else 1.0
        raw = [random.gauss(0, params.grid_std_dev) / scale for _ in contents]
        return [
            round(abs(r + target) if c == "T" else abs(r), ASSAY_PRECISION)
            for r, c in zip(raw, contents)
        ]

`__post_init__()`

Generate unique identifier.

Source code in src/snailz/assay.py

def __post_init__(self):
    """
    Generate unique identifier.
    """
    validate(
        (self.performed is None) or (self.performed > date.min),
        "assay must have sensible date"
    )

    self.ident = next(self._next_id)

`make(params, grids, ratings)` `classmethod`

Construct multiple assays.

Parameters:

Name	Type	Description	Default
`params`	`Parameters`	Parameters object.	required
`grids`	`list[Grid]`	Grids that samples are taken from.	required
`ratings`	`list[Rating]`	Proficiencies with machines.	required

Returns:

Type	Description
`list[Assay]`	List of assays.

Source code in src/snailz/assay.py

@classmethod
def make(
    cls, params: Parameters, grids: list[Grid], ratings: list[Rating]
) -> list["Assay"]:
    """
    Construct multiple assays.

    Args:
        params: Parameters object.
        grids: Grids that samples are taken from.
        ratings: Proficiencies with machines.

    Returns:
        List of assays.
    """

    result = []
    for _ in range(params.num_assays):
        g = random.choice(grids)
        x, y = random.randint(0, g.size - 1), random.randint(0, g.size - 1)
        lat, lon = g.lat_lon(x, y)
        rat = random.choice(ratings)
        performed = random_date(params.start_date, params.end_date, params.p_date_missing)
        contents = cls._random_contents(params)
        readings = cls._random_readings(params, contents, g[x, y], rat.certified)
        result.append(
            Assay(
                lat=lat,
                lon=lon,
                person_id=rat.person_id,
                machine_id=rat.machine_id,
                performed=performed,
                contents=contents,
                readings=readings,
            )
        )
    return result

`save_csv(outdir, objects)` `classmethod`

Save assays as CSV. Scalar properties of all assays are saved in one file; assay measurements are pivoted to long form and saved in a separate file.

Parameters:

Name	Type	Description	Default
`outdir`	`Path \| str`	Output directory.	required
`objects`	`list`	`Assay` objects to save.	required

Source code in src/snailz/assay.py

@classmethod
def save_csv(cls, outdir: Path | str, objects: list):
    """
    Save assays as CSV. Scalar properties of all assays are saved in
    one file; assay measurements are pivoted to long form and saved
    in a separate file.

    Args:
        outdir: Output directory.
        objects: `Assay` objects to save.
    """

    super().save_csv(outdir, objects)

    with open(Path(outdir, "assay_readings.csv"), "w", newline="") as stream:
        pivoted = cls._assay_readings(objects)
        writer = cls._csv_dict_writer(stream, list(pivoted[0].keys()))
        for obj in pivoted:
            writer.writerow(obj)

`save_db(db, objects)` `classmethod`

Save assays to database. Scalar properties of all assays are saved in one table; assay readings are pivoted to long form and saved in a separate table.

Parameters:

Name	Type	Description	Default
`db`	`Database`	Database connector.	required
`objects`	`list`	`Assay` objects to save.	required

Source code in src/snailz/assay.py

@classmethod
def save_db(cls, db: Database, objects: list):
    """
    Save assays to database. Scalar properties of all assays are
    saved in one table; assay readings are pivoted to long form
    and saved in a separate table.

    Args:
        db: Database connector.
        objects: `Assay` objects to save.
    """

    super().save_db(db, objects)

    table = db["assay_readings"]
    table.insert_all(  # type: ignore[possibly-missing-attribute]
        cls._assay_readings(objects),
        pk=("assay_id", "reading_id"),
        foreign_keys=[("assay_id", "assay", "ident")],
    )

`table_name()` `classmethod`

Database table name.

Source code in src/snailz/assay.py

@classmethod
def table_name(cls) -> str:
    """Database table name."""

    return "assay"

Assay

Assay dataclass

__post_init__()

make(params, grids, ratings) classmethod

save_csv(outdir, objects) classmethod

save_db(db, objects) classmethod

table_name() classmethod

`Assay` `dataclass`

`__post_init__()`

`make(params, grids, ratings)` `classmethod`

`save_csv(outdir, objects)` `classmethod`

`save_db(db, objects)` `classmethod`

`table_name()` `classmethod`