dc7686f507
Add progress logging and a status script for postal imports and neighbor builds, and ignore local raw and generated postal datasets.
131 lines
3.5 KiB
TypeScript
131 lines
3.5 KiB
TypeScript
import { createReadStream } from 'node:fs';
|
|
import { chain } from 'stream-chain';
|
|
import { parser } from 'stream-json';
|
|
import { pick } from 'stream-json/filters/pick';
|
|
import { streamArray } from 'stream-json/streamers/stream-array';
|
|
|
|
export type FeatureGeometry = {
|
|
type: 'Polygon' | 'MultiPolygon';
|
|
coordinates: unknown;
|
|
};
|
|
|
|
export type GeoJsonFeature = {
|
|
type: 'Feature';
|
|
properties?: Record<string, unknown> | null;
|
|
geometry?: FeatureGeometry | null;
|
|
};
|
|
|
|
export type GeoJsonFeatureCollection = {
|
|
type: 'FeatureCollection';
|
|
features: GeoJsonFeature[];
|
|
};
|
|
|
|
export type PostalDatasetConfig = {
|
|
countryCode: 'US' | 'CA';
|
|
label: string;
|
|
filePath: string;
|
|
postalCodeKeys: string[];
|
|
displayNameKeys: string[];
|
|
};
|
|
|
|
function findFirstCoordinatePair(coordinates: unknown): [number, number] | null {
|
|
if (!Array.isArray(coordinates)) {
|
|
return null;
|
|
}
|
|
|
|
if (
|
|
coordinates.length >= 2 &&
|
|
typeof coordinates[0] === 'number' &&
|
|
Number.isFinite(coordinates[0]) &&
|
|
typeof coordinates[1] === 'number' &&
|
|
Number.isFinite(coordinates[1])
|
|
) {
|
|
return [coordinates[0], coordinates[1]];
|
|
}
|
|
|
|
for (const value of coordinates) {
|
|
const pair = findFirstCoordinatePair(value);
|
|
if (pair) {
|
|
return pair;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
export async function *streamFeatureCollection(filePath: string) {
|
|
const pipeline = chain([
|
|
createReadStream(filePath, { encoding: 'utf8' }),
|
|
parser(),
|
|
pick({ filter: 'features' }) as any,
|
|
streamArray() as any,
|
|
]) as AsyncIterable<{ value: GeoJsonFeature }>;
|
|
|
|
let foundFeatures = false;
|
|
|
|
for await (const chunk of pipeline) {
|
|
foundFeatures = true;
|
|
yield chunk.value as GeoJsonFeature;
|
|
}
|
|
|
|
if (!foundFeatures) {
|
|
throw new Error(`Dataset at ${filePath} is not a valid GeoJSON FeatureCollection with a features array.`);
|
|
}
|
|
}
|
|
|
|
export function normalizePostalCode(countryCode: 'US' | 'CA', rawPostalCode: string) {
|
|
const trimmed = rawPostalCode.trim();
|
|
|
|
if (countryCode === 'US') {
|
|
const digits = trimmed.replace(/\D/g, '').slice(0, 5);
|
|
return digits.length === 5 ? digits : null;
|
|
}
|
|
|
|
const compact = trimmed.replace(/\s+/g, '').toUpperCase();
|
|
return compact.length >= 3 ? compact.slice(0, 3) : null;
|
|
}
|
|
|
|
export function getStringProperty(properties: Record<string, unknown> | null | undefined, keys: string[]) {
|
|
if (!properties) {
|
|
return null;
|
|
}
|
|
|
|
for (const key of keys) {
|
|
const value = properties[key];
|
|
|
|
if (typeof value === 'string' && value.trim().length > 0) {
|
|
return value.trim();
|
|
}
|
|
|
|
if (typeof value === 'number' && Number.isFinite(value)) {
|
|
return String(value);
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
export function getFeatureGeometry(feature: GeoJsonFeature, filePath: string, index: number) {
|
|
if (!feature.geometry || (feature.geometry.type !== 'Polygon' && feature.geometry.type !== 'MultiPolygon')) {
|
|
throw new Error(`Feature ${index} in ${filePath} is missing a Polygon or MultiPolygon geometry.`);
|
|
}
|
|
|
|
return feature.geometry;
|
|
}
|
|
|
|
export function assertGeometryIsWgs84(geometry: FeatureGeometry, filePath: string, index: number) {
|
|
const pair = findFirstCoordinatePair(geometry.coordinates);
|
|
|
|
if (!pair) {
|
|
throw new Error(`Feature ${index} in ${filePath} does not contain readable coordinates.`);
|
|
}
|
|
|
|
const [lng, lat] = pair;
|
|
|
|
if (Math.abs(lng) > 180 || Math.abs(lat) > 90) {
|
|
throw new Error(
|
|
`Feature ${index} in ${filePath} is not in WGS84 lon/lat coordinates. Re-export the dataset to GeoJSON with EPSG:4326, for example with ogr2ogr -t_srs EPSG:4326.`,
|
|
);
|
|
}
|
|
}
|