Refactor
- Break code into comprehensible chunks.
- Create classes and class hierarchies.
- Write docstrings and generating documentation pages.
- Validate implementations against one another.
Terms defined: abstract base class, invasion percolation, random number generation seed
- Goal is to refactor a program that (kind of) works to create something we can do more work with
- Original problem is invasion percolation
- Grid of random numbers
- Fill the center cell
- Repeatedly:
- Find the cell adjacent to the filled region with the lowest value
- Fill it
- Until we reach the edge
- Models spread of pollutant through fractured rock (among other things)
Original Script
- Main body of code
- Always need width, height and depth (range of random integer values)
- Random number seed is optional
# Grid size and range of fill values.
width = int(sys.argv[1])
height = int(sys.argv[2])
depth = int(sys.argv[3])
# Random number generation.
if len(sys.argv) > 4:
seed = int(sys.argv[4])
else:
seed = random.randrange(sys.maxsize)
random.seed(seed)
# Create initial grid
grid = make_grid(width, height, depth)
# Fill central cell.
grid[width // 2][height // 2] = 0
# Fill other cells.
while True:
x, y = choose_cell(grid)
grid[x][y] = 0
if on_border(width, height, x, y):
break
# Show result.
print_grid(grid, width, height, depth, seed)
- Make a grid as a list of lists
- Has a docstring
def make_grid(width, height, depth):
"""Create a width X height grid."""
grid = []
for x in range(width):
row = []
for y in range(height):
row.append(random.randint(1, depth))
grid.append(row)
return grid
- Choose the next cell to fill in by sweeping the whole grid
def choose_cell(grid):
"""Choose the next cell to fill in."""
least, cx, cy = (None, None, None)
for x in range(len(grid)):
row = grid[x]
for y in range(len(row)):
temp = grid[x][y]
if not adjacent(grid, x, y):
continue
if least is None or (temp != 0 and temp < least):
least, cx, cy = (temp, x, y)
return (cx, cy)
- Relies on another function to test adjacency
def adjacent(grid, x, y):
"""Is (x, y) adjacent to a filled cell?"""
x_1, y_1 = (x + 1, y + 1)
if x > 0 and grid[x - 1][y] == 0:
return True
if x_1 < len(grid) and grid[x_1][y] == 0:
return True
if y > 0 and grid[x][y - 1] == 0:
return True
if y_1 < len(grid[x]) and grid[x][y_1] == 0:
return True
return False
- We also need to test if we’re on the border
def on_border(width, height, x, y):
"""Is this cell on the border of the grid?"""
if x == 0 or x == width - 1:
return True
if y == 0 or y == height - 1:
return True
return False
- And finally, show the result
def print_grid(grid, width, height, depth, seed, as_numbers=False):
"""Show the result."""
print(width, height, depth, seed)
height = len(grid[0])
for y in range(height - 1, -1, -1):
for x in range(len(grid)):
if as_numbers:
sys.stdout.write(f'{grid[x][y]:02d} ')
else:
sys.stdout.write('X' if grid[x][y] == 0 else '.')
sys.stdout.write('\n')
Critique
- What if we want to change the way the grid is implemented?
- Or the way we search for the next cell to fill?
- Most meaningful test of software design quality is “how easy is it to make a plausible change?”
A Generic Driver
- Main function
def main():
"""Main driver."""
kind, width, height, depth, seed = setup()
grid = initialize_grid(kind, width, height, depth)
grid.fill()
print_grid(kind, grid, seed)
- Relies on a setup function
- Can easily replace this in future with something that reads parameters from a file
def setup():
"""Get parameters."""
kind = sys.argv[1]
width = int(sys.argv[2])
height = int(sys.argv[3])
depth = int(sys.argv[4])
if len(sys.argv) > 5:
seed = int(sys.argv[5])
else:
seed = random.randrange(sys.maxsize)
random.seed(seed)
return (kind, width, height, depth, seed)
- We’re going to build (at least) two grid classes, so import both here
from grid_list import GridList
from grid_array import GridArray
- Initialization relies on the grid’s constructor
- All grids take the same parameters in the same order
def initialize_grid(kind, width, height, depth):
"""Prepare grid for simulation."""
lookup = {'list': GridList, 'array': GridArray}
return lookup[kind](width, height, depth)
- Keep printing here
- Could have grids print themselves
def print_grid(kind, grid, seed, details='full'):
"""Show the result."""
print(kind, grid.width(), grid.height(), grid.depth(), seed)
if details == 'brief':
return
for y in range(grid.height() - 1, -1, -1):
for x in range(grid.width()):
if details == 'numbers':
sys.stdout.write(f'{grid[x, y]:02d} ')
else:
sys.stdout.write('X' if grid[x, y] == 0 else '.')
sys.stdout.write('\n')
Three Kinds of Grid
- First grid is an abstract base class
- Defines common behaviors
- Requires derived classes to provide a way to get and set item by location
from abc import ABC, abstractmethod
class GridGeneric(ABC):
"""Represent a generic grid."""
@abstractmethod
def __getitem__(self, key):
"""Get value at location."""
@abstractmethod
def __setitem__(self, key, value):
"""Set value at location."""
def __init__(self, width, height, depth):
"""Record shared state."""
self._width = width
self._height = height
self._depth = depth
def width(self):
"""Get width of grid."""
return self._width
def height(self):
"""Get height of grid."""
return self._height
def depth(self):
"""Get depth of grid."""
return self._depth
- Other operations defined in terms of the common methods
- Including the ones the derived classes have to implement
- Filling
def fill(self):
"""Fill grid one cell at a time."""
self[self.width() // 2, self.height() // 2] = 0
while True:
x, y = self.choose_cell()
self[x, y] = 0
if self.on_border(x, y):
break
- Choose the next cell:
def choose_cell(self):
"""Choose the next cell to fill."""
least, cx, cy = (None, None, None)
for x in range(self.width()):
for y in range(self.height()):
temp = self[x, y]
if not self.adjacent(x, y):
continue
if least is None or (temp != 0 and temp < least):
least, cx, cy = (temp, x, y)
return (cx, cy)
- Adjacency test
def adjacent(self, x, y):
"""Is (x, y) adjacent to a filled cell?"""
x_1, y_1 = (x + 1, y + 1)
if x > 0 and self[x - 1, y] == 0:
return True
if x_1 < self.width() and self[x_1, y] == 0:
return True
if y > 0 and self[x, y - 1] == 0:
return True
if y_1 < self.height() and self[x, y_1] == 0:
return True
return False
- Border test
def on_border(self, x, y):
"""Is this cell on the border of the grid?"""
if x == 0 or x == self.width() - 1:
return True
if y == 0 or y == self.height() - 1:
return True
return False
- Now implement a grid that uses list-of-lists like before
class GridList(GridGeneric):
"""Represent grid as list of lists."""
def __init__(self, width, height, depth):
"""Construct and fill."""
super().__init__(width, height, depth)
self._grid = []
for x in range(self._width):
row = []
for y in range(self._height):
row.append(random.randint(1, depth))
self._grid.append(row)
def __getitem__(self, key):
"""Get value at location."""
x, y = key
return self._grid[x][y]
def __setitem__(self, key, value):
"""Set value at location."""
x, y = key
self._grid[x][y] = value
- And another that uses a NumPy array
class GridArray(GridGeneric):
"""Represent grid as NumPy array."""
def __init__(self, width, height, depth):
"""Construct and fill."""
super().__init__(width, height, depth)
self._grid = np.zeros((width, height), dtype=int)
for x in range(self.width()):
for y in range(self.height()):
self[x, y] = random.randint(1, depth)
def __getitem__(self, key):
"""Get value at location."""
return self._grid[*key,]
def __setitem__(self, key, value):
"""Set value at location."""
self._grid[*key,] = value
- Should always get identical answer regardless of which grid is used
- For the same parameters including RNG seed of course
- Allows cross-validation of implementations