import { createReadStream } from 'node:fs'; import { chain } from 'stream-chain'; import { parser } from 'stream-json'; import { pick } from 'stream-json/filters/pick'; import { streamArray } from 'stream-json/streamers/stream-array'; export type FeatureGeometry = { type: 'Polygon' | 'MultiPolygon'; coordinates: unknown; }; export type GeoJsonFeature = { type: 'Feature'; properties?: Record | null; geometry?: FeatureGeometry | null; }; export type GeoJsonFeatureCollection = { type: 'FeatureCollection'; features: GeoJsonFeature[]; }; export type PostalDatasetConfig = { countryCode: 'US' | 'CA'; label: string; filePath: string; postalCodeKeys: string[]; displayNameKeys: string[]; }; function findFirstCoordinatePair(coordinates: unknown): [number, number] | null { if (!Array.isArray(coordinates)) { return null; } if ( coordinates.length >= 2 && typeof coordinates[0] === 'number' && Number.isFinite(coordinates[0]) && typeof coordinates[1] === 'number' && Number.isFinite(coordinates[1]) ) { return [coordinates[0], coordinates[1]]; } for (const value of coordinates) { const pair = findFirstCoordinatePair(value); if (pair) { return pair; } } return null; } export async function *streamFeatureCollection(filePath: string) { const pipeline = chain([ createReadStream(filePath, { encoding: 'utf8' }), parser(), pick({ filter: 'features' }) as any, streamArray() as any, ]) as AsyncIterable<{ value: GeoJsonFeature }>; let foundFeatures = false; for await (const chunk of pipeline) { foundFeatures = true; yield chunk.value as GeoJsonFeature; } if (!foundFeatures) { throw new Error(`Dataset at ${filePath} is not a valid GeoJSON FeatureCollection with a features array.`); } } export function normalizePostalCode(countryCode: 'US' | 'CA', rawPostalCode: string) { const trimmed = rawPostalCode.trim(); if (countryCode === 'US') { const digits = trimmed.replace(/\D/g, '').slice(0, 5); return digits.length === 5 ? digits : null; } const compact = trimmed.replace(/\s+/g, '').toUpperCase(); return compact.length >= 3 ? compact.slice(0, 3) : null; } export function getStringProperty(properties: Record | null | undefined, keys: string[]) { if (!properties) { return null; } for (const key of keys) { const value = properties[key]; if (typeof value === 'string' && value.trim().length > 0) { return value.trim(); } if (typeof value === 'number' && Number.isFinite(value)) { return String(value); } } return null; } export function getFeatureGeometry(feature: GeoJsonFeature, filePath: string, index: number) { if (!feature.geometry || (feature.geometry.type !== 'Polygon' && feature.geometry.type !== 'MultiPolygon')) { throw new Error(`Feature ${index} in ${filePath} is missing a Polygon or MultiPolygon geometry.`); } return feature.geometry; } export function assertGeometryIsWgs84(geometry: FeatureGeometry, filePath: string, index: number) { const pair = findFirstCoordinatePair(geometry.coordinates); if (!pair) { throw new Error(`Feature ${index} in ${filePath} does not contain readable coordinates.`); } const [lng, lat] = pair; if (Math.abs(lng) > 180 || Math.abs(lat) > 90) { throw new Error( `Feature ${index} in ${filePath} is not in WGS84 lon/lat coordinates. Re-export the dataset to GeoJSON with EPSG:4326, for example with ogr2ogr -t_srs EPSG:4326.`, ); } }