Skip to content

Parameters

Data generation parameters.

Parameters dataclass

Store all data generation parameters.

Source code in src/snailz/parameters.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
@dataclass
class Parameters:
    """
    Store all data generation parameters.
    """

    seed: int = 12345
    """Random number generator seed for reproducible data generation."""

    num_grids: int = 1
    """Number of survey grids to create."""

    grid_size: int = 1
    """Width and height of each survey grid in cells."""

    grid_spacing: float = 10.0
    """Size of each grid cell in meters."""

    grid_separation: int = 4
    """Minimum separation between grid origin points as multiple of total grid size."""

    grid_std_dev: float = 0.5
    """Standard deviation of noise applied to grid pollution values."""

    lat0: float = 48.8666632
    """Reference latitude for all grids."""

    lon0: float = -124.1999992
    """Reference longitude for all grids."""

    num_persons: int = 1
    """Number of persons to generate."""

    supervisor_frac: float = 0.3
    """Fraction of persons who are supervisors of other persons."""

    locale: str = "et_EE"
    """Locale for generating personal and family names of persons."""

    num_machines: int = 1
    """Number of machines to generate."""

    ratings_frac: float = 0.5
    """Fraction of (person, machine) pairs to be given ratings."""

    p_certified: float = 0.3
    """Probability that a particular person is certified for a particular machine."""

    num_assays: int = 1
    """Number of soil pollution assays to generate."""

    assay_size: int = 2
    """Number of control or treatment values to include in each assay."""

    assay_certified: float = 3.0
    """How much to narrow standard deviation in assay pollution if operator is certified."""

    genome_length: int = 1
    """Length of species genome in bases."""

    num_loci: int = 1
    """Number of loci in genome at which mutations may occur."""

    p_mutation: float = 0.5
    """Probability of mutation at each locus."""

    num_specimens: int = 1
    """Number of snail specimens to create."""

    mass_beta_0: float = 3.0
    """Fixed mean for log-normal snail mass generation."""

    mass_beta_1: float = 0.5
    """Scaling factor for pollution in mean of log-normal snail mass generation."""

    mass_sigma: float = 0.3
    """Standard deviation in log-normal generation of snail mass."""

    diam_ratio: float = 0.7
    """Mean of ratio of snail diameter to mass."""

    diam_sigma: float = 0.7
    """Standard deviation in snail diameter generation."""

    start_date: date = date(2026, 3, 1)
    """Start date of survey."""

    end_date: date = date(2026, 5, 31)
    """End date of survey."""

    def __post_init__(self):
        """Validate fields."""

        validate(self.num_grids > 0, "require positive number of grids")
        validate(self.grid_size > 0, "require positive grid size")
        validate(self.grid_spacing > 0, "require positive grid spacing")
        validate_lat_lon("parameters", self.lat0, self.lon0)
        validate(self.num_persons > 0, "require positive number of persons")
        validate(
            self.supervisor_frac >= 0.0, "require non-negative supervisor fraction"
        )
        validate(self.locale in AVAILABLE_LOCALES, f"unknown locale {self.locale}")
        validate(self.num_machines > 0, "require positive number of machines")
        validate(0.0 <= self.ratings_frac <= 1.0, "require ratings fraction in [0..1]")
        validate(self.num_assays >= 1, "require at least one assay")
        validate(self.assay_size >= 2, "require assay size at least two")
        validate(self.genome_length > 0, "require positive genome length")
        validate(self.num_loci >= 0, "require non-negative number of loci")
        validate(
            0.0 <= self.p_mutation <= 1.0, "require mutation probability in [0..1]"
        )
        validate(self.num_specimens > 0, "require positive number of specimens")
        validate(
            self.start_date <= self.end_date, "require non-negative survey date range"
        )

    def as_json(self, indent: int = JSON_INDENT) -> str:
        """
        Convert parameters to a JSON string.

        Args:
            indent: Indentation.

        Returns:
            JSON string representation of persistable fields.
        """
        return json.dumps(self.__dict__, indent=indent, default=_serialize_json)

seed = 12345 class-attribute instance-attribute

Random number generator seed for reproducible data generation.

num_grids = 1 class-attribute instance-attribute

Number of survey grids to create.

grid_size = 1 class-attribute instance-attribute

Width and height of each survey grid in cells.

grid_spacing = 10.0 class-attribute instance-attribute

Size of each grid cell in meters.

grid_separation = 4 class-attribute instance-attribute

Minimum separation between grid origin points as multiple of total grid size.

grid_std_dev = 0.5 class-attribute instance-attribute

Standard deviation of noise applied to grid pollution values.

lat0 = 48.8666632 class-attribute instance-attribute

Reference latitude for all grids.

lon0 = -124.1999992 class-attribute instance-attribute

Reference longitude for all grids.

num_persons = 1 class-attribute instance-attribute

Number of persons to generate.

supervisor_frac = 0.3 class-attribute instance-attribute

Fraction of persons who are supervisors of other persons.

locale = 'et_EE' class-attribute instance-attribute

Locale for generating personal and family names of persons.

num_machines = 1 class-attribute instance-attribute

Number of machines to generate.

ratings_frac = 0.5 class-attribute instance-attribute

Fraction of (person, machine) pairs to be given ratings.

p_certified = 0.3 class-attribute instance-attribute

Probability that a particular person is certified for a particular machine.

num_assays = 1 class-attribute instance-attribute

Number of soil pollution assays to generate.

assay_size = 2 class-attribute instance-attribute

Number of control or treatment values to include in each assay.

assay_certified = 3.0 class-attribute instance-attribute

How much to narrow standard deviation in assay pollution if operator is certified.

genome_length = 1 class-attribute instance-attribute

Length of species genome in bases.

num_loci = 1 class-attribute instance-attribute

Number of loci in genome at which mutations may occur.

p_mutation = 0.5 class-attribute instance-attribute

Probability of mutation at each locus.

num_specimens = 1 class-attribute instance-attribute

Number of snail specimens to create.

mass_beta_0 = 3.0 class-attribute instance-attribute

Fixed mean for log-normal snail mass generation.

mass_beta_1 = 0.5 class-attribute instance-attribute

Scaling factor for pollution in mean of log-normal snail mass generation.

mass_sigma = 0.3 class-attribute instance-attribute

Standard deviation in log-normal generation of snail mass.

diam_ratio = 0.7 class-attribute instance-attribute

Mean of ratio of snail diameter to mass.

diam_sigma = 0.7 class-attribute instance-attribute

Standard deviation in snail diameter generation.

start_date = date(2026, 3, 1) class-attribute instance-attribute

Start date of survey.

end_date = date(2026, 5, 31) class-attribute instance-attribute

End date of survey.

__post_init__()

Validate fields.

Source code in src/snailz/parameters.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def __post_init__(self):
    """Validate fields."""

    validate(self.num_grids > 0, "require positive number of grids")
    validate(self.grid_size > 0, "require positive grid size")
    validate(self.grid_spacing > 0, "require positive grid spacing")
    validate_lat_lon("parameters", self.lat0, self.lon0)
    validate(self.num_persons > 0, "require positive number of persons")
    validate(
        self.supervisor_frac >= 0.0, "require non-negative supervisor fraction"
    )
    validate(self.locale in AVAILABLE_LOCALES, f"unknown locale {self.locale}")
    validate(self.num_machines > 0, "require positive number of machines")
    validate(0.0 <= self.ratings_frac <= 1.0, "require ratings fraction in [0..1]")
    validate(self.num_assays >= 1, "require at least one assay")
    validate(self.assay_size >= 2, "require assay size at least two")
    validate(self.genome_length > 0, "require positive genome length")
    validate(self.num_loci >= 0, "require non-negative number of loci")
    validate(
        0.0 <= self.p_mutation <= 1.0, "require mutation probability in [0..1]"
    )
    validate(self.num_specimens > 0, "require positive number of specimens")
    validate(
        self.start_date <= self.end_date, "require non-negative survey date range"
    )

as_json(indent=JSON_INDENT)

Convert parameters to a JSON string.

Parameters:

Name Type Description Default
indent int

Indentation.

JSON_INDENT

Returns:

Type Description
str

JSON string representation of persistable fields.

Source code in src/snailz/parameters.py
132
133
134
135
136
137
138
139
140
141
142
def as_json(self, indent: int = JSON_INDENT) -> str:
    """
    Convert parameters to a JSON string.

    Args:
        indent: Indentation.

    Returns:
        JSON string representation of persistable fields.
    """
    return json.dumps(self.__dict__, indent=indent, default=_serialize_json)