Public Access
1
0
Files
leads4less/db/scripts/postal-import-utils.ts
pguerrerox dc7686f507 chore: improve postal data import observability
Add progress logging and a status script for postal imports and neighbor builds, and ignore local raw and generated postal datasets.
2026-04-12 23:22:36 +00:00

131 lines
3.5 KiB
TypeScript

import { createReadStream } from 'node:fs';
import { chain } from 'stream-chain';
import { parser } from 'stream-json';
import { pick } from 'stream-json/filters/pick';
import { streamArray } from 'stream-json/streamers/stream-array';
export type FeatureGeometry = {
type: 'Polygon' | 'MultiPolygon';
coordinates: unknown;
};
export type GeoJsonFeature = {
type: 'Feature';
properties?: Record<string, unknown> | null;
geometry?: FeatureGeometry | null;
};
export type GeoJsonFeatureCollection = {
type: 'FeatureCollection';
features: GeoJsonFeature[];
};
export type PostalDatasetConfig = {
countryCode: 'US' | 'CA';
label: string;
filePath: string;
postalCodeKeys: string[];
displayNameKeys: string[];
};
function findFirstCoordinatePair(coordinates: unknown): [number, number] | null {
if (!Array.isArray(coordinates)) {
return null;
}
if (
coordinates.length >= 2 &&
typeof coordinates[0] === 'number' &&
Number.isFinite(coordinates[0]) &&
typeof coordinates[1] === 'number' &&
Number.isFinite(coordinates[1])
) {
return [coordinates[0], coordinates[1]];
}
for (const value of coordinates) {
const pair = findFirstCoordinatePair(value);
if (pair) {
return pair;
}
}
return null;
}
export async function *streamFeatureCollection(filePath: string) {
const pipeline = chain([
createReadStream(filePath, { encoding: 'utf8' }),
parser(),
pick({ filter: 'features' }) as any,
streamArray() as any,
]) as AsyncIterable<{ value: GeoJsonFeature }>;
let foundFeatures = false;
for await (const chunk of pipeline) {
foundFeatures = true;
yield chunk.value as GeoJsonFeature;
}
if (!foundFeatures) {
throw new Error(`Dataset at ${filePath} is not a valid GeoJSON FeatureCollection with a features array.`);
}
}
export function normalizePostalCode(countryCode: 'US' | 'CA', rawPostalCode: string) {
const trimmed = rawPostalCode.trim();
if (countryCode === 'US') {
const digits = trimmed.replace(/\D/g, '').slice(0, 5);
return digits.length === 5 ? digits : null;
}
const compact = trimmed.replace(/\s+/g, '').toUpperCase();
return compact.length >= 3 ? compact.slice(0, 3) : null;
}
export function getStringProperty(properties: Record<string, unknown> | null | undefined, keys: string[]) {
if (!properties) {
return null;
}
for (const key of keys) {
const value = properties[key];
if (typeof value === 'string' && value.trim().length > 0) {
return value.trim();
}
if (typeof value === 'number' && Number.isFinite(value)) {
return String(value);
}
}
return null;
}
export function getFeatureGeometry(feature: GeoJsonFeature, filePath: string, index: number) {
if (!feature.geometry || (feature.geometry.type !== 'Polygon' && feature.geometry.type !== 'MultiPolygon')) {
throw new Error(`Feature ${index} in ${filePath} is missing a Polygon or MultiPolygon geometry.`);
}
return feature.geometry;
}
export function assertGeometryIsWgs84(geometry: FeatureGeometry, filePath: string, index: number) {
const pair = findFirstCoordinatePair(geometry.coordinates);
if (!pair) {
throw new Error(`Feature ${index} in ${filePath} does not contain readable coordinates.`);
}
const [lng, lat] = pair;
if (Math.abs(lng) > 180 || Math.abs(lat) > 90) {
throw new Error(
`Feature ${index} in ${filePath} is not in WGS84 lon/lat coordinates. Re-export the dataset to GeoJSON with EPSG:4326, for example with ogr2ogr -t_srs EPSG:4326.`,
);
}
}