Mercurial > hg > GlobalNeighbors
annotate tests/test_read.py @ 0:5dba84370182
initial commit; half-working prototype
| author | Jeff Hammel <k0scist@gmail.com> | 
|---|---|
| date | Sat, 24 Jun 2017 12:03:39 -0700 | 
| parents | |
| children | 1b94f3bf97e5 | 
| rev | line source | 
|---|---|
| 0 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 2 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 3 """ | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 4 test data reading + loading | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 5 """ | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 6 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 7 import os | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 8 import unittest | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 9 from globalneighbors import schema | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 10 from globalneighbors.read import read_tsv | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 11 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 12 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 13 here = os.path.dirname(os.path.abspath(__file__)) | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 14 data = os.path.join(here, 'data') | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 15 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 16 class TestDataRead(unittest.TestCase): | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 17 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 18 # created with | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 19 # head -n 10 cities1000.txt > GlobalNeighbors/tests/data/sample.tsv | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 20 test_tsv = os.path.join(data, 'sample.tsv') | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 21 test_tsv_lines = 10 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 22 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 23 # full dataset: test with caution | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 24 full_tsv = os.path.join(data, 'cities1000.txt') | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 25 full_tsv_lines = 149092 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 26 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 27 def test_read_tsv(self): | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 28 """test reading a tsv file chunk""" | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 29 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 30 assert os.path.isfile(self.test_tsv) | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 31 sample = read_tsv(self.test_tsv) | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 32 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 33 assert len(sample) == 10 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 34 for row in sample: | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 35 assert len(row) == len(schema.descriptions) | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 36 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 37 def test_full_dataset(self): | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 38 """ensure we can operate on the full dataset""" | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 39 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 40 assert os.path.isfile(self.full_tsv) | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 41 cities = read_tsv(self.full_tsv) | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 42 assert len(cities) == self.full_tsv_lines | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 43 for row in cities: | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 44 assert len(row) == len(schema.descriptions) | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 45 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 46 # cast the data into types we want | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 47 for row in cities: | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 48 row = schema.cast_row(row, types=schema.types) | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 49 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 50 | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 51 if __name__ == '__main__': | 
| 
5dba84370182
initial commit; half-working prototype
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 52 unittest.main() | 
