Skip to content

Assay

Pollution measurement.

Assay dataclass

Bases: BaseMixin

A single pollution assay.

Attributes:

Name Type Description
ident str

unique identifier

lat float

latitude where assay performed (from grid cell)

lon float

longitude where assay performed (from grid cell)

person_id str

who performed assay (from persons)

machine_id str

machine used for assay (from machines)

performed date

date assay was performed

contents str

'C' for control or 'T' for treatment

readings list[float]

readings for contents

Source code in src/snailz/assay.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
@dataclass
class Assay(BaseMixin):
    """
    A single pollution assay.

    Attributes:
        ident: unique identifier
        lat: latitude where assay performed (from grid cell)
        lon: longitude where assay performed (from grid cell)
        person_id: who performed assay (from persons)
        machine_id: machine used for assay (from machines)
        performed: date assay was performed
        contents: 'C' for control or 'T' for treatment
        readings: readings for contents
    """

    primary_key: ClassVar[str] = "ident"
    foreign_keys: ClassVar[ForeignKeysType] = [
        ("lat", "grid_cells", "lat"),
        ("lon", "grid_cells", "lon"),
        ("person_id", "person", "ident"),
        ("machine_id", "machine", "ident"),
    ]
    pivot_keys: ClassVar[set[str]] = {"contents", "readings"}
    _next_id: ClassVar[IdGeneratorType] = id_generator("A", 4)

    ident: str = ""
    lat: float = 0.0
    lon: float = 0.0
    person_id: str = ""
    machine_id: str = ""
    performed: date = date.min
    contents: str = ""
    readings: list[float] = field(default_factory=list)

    def __post_init__(self):
        """
        Generate unique identifier.
        """

        self.ident = next(self._next_id)

    @classmethod
    def make(
        cls, params: Parameters, grids: list[Grid], ratings: list[Rating]
    ) -> list["Assay"]:
        """
        Construct multiple assays.

        Args:
            params: Parameters object.
            grids: Grids that samples are taken from.
            ratings: Proficiencies with machines.

        Returns:
            List of assays.
        """

        result = []
        for _ in range(params.num_assays):
            g = random.choice(grids)
            x, y = random.randint(0, g.size - 1), random.randint(0, g.size - 1)
            lat, lon = g.lat_lon(x, y)
            rat = random.choice(ratings)
            performed = random_date(params.start_date, params.end_date)
            contents = cls._random_contents(params)
            readings = cls._random_readings(params, contents, g[x, y], rat.certified)
            result.append(
                Assay(
                    lat=lat,
                    lon=lon,
                    person_id=rat.person_id,
                    machine_id=rat.machine_id,
                    performed=performed,
                    contents=contents,
                    readings=readings,
                )
            )
        return result

    @classmethod
    def save_csv(cls, outdir: Path | str, objects: list):
        """
        Save assays as CSV. Scalar properties of all assays are saved in
        one file; assay measurements are pivoted to long form and saved
        in a separate file.

        Args:
            outdir: Output directory.
            objects: `Assay` objects to save.
        """

        super().save_csv(outdir, objects)

        with open(Path(outdir, "assay_readings.csv"), "w", newline="") as stream:
            pivoted = cls._assay_readings(objects)
            writer = cls._csv_dict_writer(stream, list(pivoted[0].keys()))
            for obj in pivoted:
                writer.writerow(obj)

    @classmethod
    def save_db(cls, db: Database, objects: list):
        """
        Save assays to database. Scalar properties of all assays are
        saved in one table; assay readings are pivoted to long form
        and saved in a separate table.

        Args:
            db: Database connector.
            objects: `Assay` objects to save.
        """

        super().save_db(db, objects)

        table = db["assay_readings"]
        table.insert_all(  # type: ignore[possibly-missing-attribute]
            cls._assay_readings(objects),
            pk=("assay_id", "reading_id"),
            foreign_keys=[("assay_id", "assay", "ident")],
        )

    @classmethod
    def table_name(cls) -> str:
        """Database table name."""

        return "assay"

    @classmethod
    def _assay_readings(cls, assays: list[Self]) -> list[dict[str, str | float]]:
        """
        Get assay readings in long format for persistence.

        Args:
            assays: Assays to pivot.

        Returns:
            List of persistable dictionaries.
        """

        return [
            {"assay_id": a.ident, "reading_id": i + 1, "contents": c, "reading": r}
            for a in assays
            for i, (c, r) in enumerate(zip(a.contents, a.readings))
        ]

    @classmethod
    def _random_contents(cls, params: Parameters) -> str:
        """
        Generate random control or treatment indicators.

        Args:
            params: Control parameters.

        Returns:
            String of "CT".
        """

        num_controls = params.assay_size // 2
        num_treatments = params.assay_size - num_controls
        contents = ["C"] * num_controls + ["T"] * num_treatments
        random.shuffle(contents)
        return "".join(contents)

    @classmethod
    def _random_readings(
        cls, params: Parameters, contents: str, target: float, certified: bool
    ) -> list[float]:
        """
        Generate random readings clustered around target value.

        Args:
            params: Control parameters.
            contents: "CT" string showing control or treatment.
            target: Desired mean result.
            certified: Whether person is certified for machine being used.

        Returns:
            List of assay readings.
        """

        scale = params.assay_certified if certified else 1.0
        raw = [random.gauss(0, params.grid_std_dev) / scale for _ in contents]
        return [
            round(abs(r + target) if c == "T" else abs(r), ASSAY_PRECISION)
            for r, c in zip(raw, contents)
        ]

__post_init__()

Generate unique identifier.

Source code in src/snailz/assay.py
55
56
57
58
59
60
def __post_init__(self):
    """
    Generate unique identifier.
    """

    self.ident = next(self._next_id)

make(params, grids, ratings) classmethod

Construct multiple assays.

Parameters:

Name Type Description Default
params Parameters

Parameters object.

required
grids list[Grid]

Grids that samples are taken from.

required
ratings list[Rating]

Proficiencies with machines.

required

Returns:

Type Description
list[Assay]

List of assays.

Source code in src/snailz/assay.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
@classmethod
def make(
    cls, params: Parameters, grids: list[Grid], ratings: list[Rating]
) -> list["Assay"]:
    """
    Construct multiple assays.

    Args:
        params: Parameters object.
        grids: Grids that samples are taken from.
        ratings: Proficiencies with machines.

    Returns:
        List of assays.
    """

    result = []
    for _ in range(params.num_assays):
        g = random.choice(grids)
        x, y = random.randint(0, g.size - 1), random.randint(0, g.size - 1)
        lat, lon = g.lat_lon(x, y)
        rat = random.choice(ratings)
        performed = random_date(params.start_date, params.end_date)
        contents = cls._random_contents(params)
        readings = cls._random_readings(params, contents, g[x, y], rat.certified)
        result.append(
            Assay(
                lat=lat,
                lon=lon,
                person_id=rat.person_id,
                machine_id=rat.machine_id,
                performed=performed,
                contents=contents,
                readings=readings,
            )
        )
    return result

save_csv(outdir, objects) classmethod

Save assays as CSV. Scalar properties of all assays are saved in one file; assay measurements are pivoted to long form and saved in a separate file.

Parameters:

Name Type Description Default
outdir Path | str

Output directory.

required
objects list

Assay objects to save.

required
Source code in src/snailz/assay.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
@classmethod
def save_csv(cls, outdir: Path | str, objects: list):
    """
    Save assays as CSV. Scalar properties of all assays are saved in
    one file; assay measurements are pivoted to long form and saved
    in a separate file.

    Args:
        outdir: Output directory.
        objects: `Assay` objects to save.
    """

    super().save_csv(outdir, objects)

    with open(Path(outdir, "assay_readings.csv"), "w", newline="") as stream:
        pivoted = cls._assay_readings(objects)
        writer = cls._csv_dict_writer(stream, list(pivoted[0].keys()))
        for obj in pivoted:
            writer.writerow(obj)

save_db(db, objects) classmethod

Save assays to database. Scalar properties of all assays are saved in one table; assay readings are pivoted to long form and saved in a separate table.

Parameters:

Name Type Description Default
db Database

Database connector.

required
objects list

Assay objects to save.

required
Source code in src/snailz/assay.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
@classmethod
def save_db(cls, db: Database, objects: list):
    """
    Save assays to database. Scalar properties of all assays are
    saved in one table; assay readings are pivoted to long form
    and saved in a separate table.

    Args:
        db: Database connector.
        objects: `Assay` objects to save.
    """

    super().save_db(db, objects)

    table = db["assay_readings"]
    table.insert_all(  # type: ignore[possibly-missing-attribute]
        cls._assay_readings(objects),
        pk=("assay_id", "reading_id"),
        foreign_keys=[("assay_id", "assay", "ident")],
    )

table_name() classmethod

Database table name.

Source code in src/snailz/assay.py
141
142
143
144
145
@classmethod
def table_name(cls) -> str:
    """Database table name."""

    return "assay"