Public Access
1
0

chore: improve postal data import observability

Add progress logging and a status script for postal imports and neighbor builds, and ignore local raw and generated postal datasets.
This commit is contained in:
pguerrerox
2026-04-12 23:22:36 +00:00
parent cc00a439bf
commit dc7686f507
11 changed files with 267 additions and 23 deletions
+62 -8
View File
@@ -1,4 +1,8 @@
import { readFile } from 'node:fs/promises';
import { createReadStream } from 'node:fs';
import { chain } from 'stream-chain';
import { parser } from 'stream-json';
import { pick } from 'stream-json/filters/pick';
import { streamArray } from 'stream-json/streamers/stream-array';
export type FeatureGeometry = {
type: 'Polygon' | 'MultiPolygon';
@@ -24,15 +28,49 @@ export type PostalDatasetConfig = {
displayNameKeys: string[];
};
export async function readFeatureCollection(filePath: string) {
const raw = await readFile(filePath, 'utf8');
const parsed = JSON.parse(raw) as GeoJsonFeatureCollection;
if (parsed.type !== 'FeatureCollection' || !Array.isArray(parsed.features)) {
throw new Error(`Dataset at ${filePath} is not a valid GeoJSON FeatureCollection.`);
function findFirstCoordinatePair(coordinates: unknown): [number, number] | null {
if (!Array.isArray(coordinates)) {
return null;
}
return parsed;
if (
coordinates.length >= 2 &&
typeof coordinates[0] === 'number' &&
Number.isFinite(coordinates[0]) &&
typeof coordinates[1] === 'number' &&
Number.isFinite(coordinates[1])
) {
return [coordinates[0], coordinates[1]];
}
for (const value of coordinates) {
const pair = findFirstCoordinatePair(value);
if (pair) {
return pair;
}
}
return null;
}
export async function *streamFeatureCollection(filePath: string) {
const pipeline = chain([
createReadStream(filePath, { encoding: 'utf8' }),
parser(),
pick({ filter: 'features' }) as any,
streamArray() as any,
]) as AsyncIterable<{ value: GeoJsonFeature }>;
let foundFeatures = false;
for await (const chunk of pipeline) {
foundFeatures = true;
yield chunk.value as GeoJsonFeature;
}
if (!foundFeatures) {
throw new Error(`Dataset at ${filePath} is not a valid GeoJSON FeatureCollection with a features array.`);
}
}
export function normalizePostalCode(countryCode: 'US' | 'CA', rawPostalCode: string) {
@@ -74,3 +112,19 @@ export function getFeatureGeometry(feature: GeoJsonFeature, filePath: string, in
return feature.geometry;
}
export function assertGeometryIsWgs84(geometry: FeatureGeometry, filePath: string, index: number) {
const pair = findFirstCoordinatePair(geometry.coordinates);
if (!pair) {
throw new Error(`Feature ${index} in ${filePath} does not contain readable coordinates.`);
}
const [lng, lat] = pair;
if (Math.abs(lng) > 180 || Math.abs(lat) > 90) {
throw new Error(
`Feature ${index} in ${filePath} is not in WGS84 lon/lat coordinates. Re-export the dataset to GeoJSON with EPSG:4326, for example with ogr2ogr -t_srs EPSG:4326.`,
);
}
}