chore: improve postal data import observability
Add progress logging and a status script for postal imports and neighbor builds, and ignore local raw and generated postal datasets.
This commit is contained in:
@@ -1,4 +1,8 @@
|
||||
import { readFile } from 'node:fs/promises';
|
||||
import { createReadStream } from 'node:fs';
|
||||
import { chain } from 'stream-chain';
|
||||
import { parser } from 'stream-json';
|
||||
import { pick } from 'stream-json/filters/pick';
|
||||
import { streamArray } from 'stream-json/streamers/stream-array';
|
||||
|
||||
export type FeatureGeometry = {
|
||||
type: 'Polygon' | 'MultiPolygon';
|
||||
@@ -24,15 +28,49 @@ export type PostalDatasetConfig = {
|
||||
displayNameKeys: string[];
|
||||
};
|
||||
|
||||
export async function readFeatureCollection(filePath: string) {
|
||||
const raw = await readFile(filePath, 'utf8');
|
||||
const parsed = JSON.parse(raw) as GeoJsonFeatureCollection;
|
||||
|
||||
if (parsed.type !== 'FeatureCollection' || !Array.isArray(parsed.features)) {
|
||||
throw new Error(`Dataset at ${filePath} is not a valid GeoJSON FeatureCollection.`);
|
||||
function findFirstCoordinatePair(coordinates: unknown): [number, number] | null {
|
||||
if (!Array.isArray(coordinates)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return parsed;
|
||||
if (
|
||||
coordinates.length >= 2 &&
|
||||
typeof coordinates[0] === 'number' &&
|
||||
Number.isFinite(coordinates[0]) &&
|
||||
typeof coordinates[1] === 'number' &&
|
||||
Number.isFinite(coordinates[1])
|
||||
) {
|
||||
return [coordinates[0], coordinates[1]];
|
||||
}
|
||||
|
||||
for (const value of coordinates) {
|
||||
const pair = findFirstCoordinatePair(value);
|
||||
if (pair) {
|
||||
return pair;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function *streamFeatureCollection(filePath: string) {
|
||||
const pipeline = chain([
|
||||
createReadStream(filePath, { encoding: 'utf8' }),
|
||||
parser(),
|
||||
pick({ filter: 'features' }) as any,
|
||||
streamArray() as any,
|
||||
]) as AsyncIterable<{ value: GeoJsonFeature }>;
|
||||
|
||||
let foundFeatures = false;
|
||||
|
||||
for await (const chunk of pipeline) {
|
||||
foundFeatures = true;
|
||||
yield chunk.value as GeoJsonFeature;
|
||||
}
|
||||
|
||||
if (!foundFeatures) {
|
||||
throw new Error(`Dataset at ${filePath} is not a valid GeoJSON FeatureCollection with a features array.`);
|
||||
}
|
||||
}
|
||||
|
||||
export function normalizePostalCode(countryCode: 'US' | 'CA', rawPostalCode: string) {
|
||||
@@ -74,3 +112,19 @@ export function getFeatureGeometry(feature: GeoJsonFeature, filePath: string, in
|
||||
|
||||
return feature.geometry;
|
||||
}
|
||||
|
||||
export function assertGeometryIsWgs84(geometry: FeatureGeometry, filePath: string, index: number) {
|
||||
const pair = findFirstCoordinatePair(geometry.coordinates);
|
||||
|
||||
if (!pair) {
|
||||
throw new Error(`Feature ${index} in ${filePath} does not contain readable coordinates.`);
|
||||
}
|
||||
|
||||
const [lng, lat] = pair;
|
||||
|
||||
if (Math.abs(lng) > 180 || Math.abs(lat) > 90) {
|
||||
throw new Error(
|
||||
`Feature ${index} in ${filePath} is not in WGS84 lon/lat coordinates. Re-export the dataset to GeoJSON with EPSG:4326, for example with ogr2ogr -t_srs EPSG:4326.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user