Public Access
1
0
Files
leads4less/db/scripts/import-postal-areas.ts
pguerrerox dc7686f507 chore: improve postal data import observability
Add progress logging and a status script for postal imports and neighbor builds, and ignore local raw and generated postal datasets.
2026-04-12 23:22:36 +00:00

144 lines
4.8 KiB
TypeScript

import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { getDbPool } from '../../server/src/db/pool.js';
import { createScriptLogger } from './postal-logging.js';
import { assertGeometryIsWgs84, getFeatureGeometry, getStringProperty, normalizePostalCode, streamFeatureCollection, type PostalDatasetConfig } from './postal-import-utils.js';
const currentDir = path.dirname(fileURLToPath(import.meta.url));
const datasetsRoot = path.resolve(currentDir, '../datasets/postal');
const datasetConfigs: PostalDatasetConfig[] = [
{
countryCode: 'US',
label: 'US ZIP/ZCTA',
filePath: process.env.POSTAL_US_DATASET_PATH || path.join(datasetsRoot, 'us_zcta.geojson'),
postalCodeKeys: ['postal_code', 'zip', 'zcta', 'GEOID20', 'ZCTA5CE20', 'ZCTA5CE10'],
displayNameKeys: ['display_name', 'name', 'NAMELSAD20', 'GEOID20', 'postal_code'],
},
{
countryCode: 'CA',
label: 'Canada FSA',
filePath: process.env.POSTAL_CA_DATASET_PATH || path.join(datasetsRoot, 'ca_fsa.geojson'),
postalCodeKeys: ['postal_code', 'fsa', 'CFSAUID', 'CFSAUID24'],
displayNameKeys: ['display_name', 'name', 'CFSAUID', 'postal_code'],
},
];
const logEvery = Math.max(1, Number.parseInt(process.env.POSTAL_IMPORT_LOG_EVERY ?? '500', 10) || 500);
async function importDataset(config: PostalDatasetConfig) {
const pool = getDbPool();
const client = await pool.connect();
const logger = createScriptLogger(`postal-import:${config.countryCode.toLowerCase()}`);
let index = 0;
try {
let insertedCount = 0;
let skippedCount = 0;
logger.info(`Starting ${config.label} import from ${config.filePath}`);
await client.query('begin');
for await (const feature of streamFeatureCollection(config.filePath)) {
index += 1;
const rawPostalCode = getStringProperty(feature.properties, config.postalCodeKeys);
if (!rawPostalCode) {
skippedCount += 1;
if (index % logEvery === 0) {
logger.info(`Processed ${index} features, inserted ${insertedCount}, skipped ${skippedCount}`);
}
continue;
}
const normalizedPostalCode = normalizePostalCode(config.countryCode, rawPostalCode);
if (!normalizedPostalCode) {
skippedCount += 1;
if (index % logEvery === 0) {
logger.info(`Processed ${index} features, inserted ${insertedCount}, skipped ${skippedCount}`);
}
continue;
}
const displayName = getStringProperty(feature.properties, config.displayNameKeys) || normalizedPostalCode;
const geometry = getFeatureGeometry(feature, config.filePath, index);
assertGeometryIsWgs84(geometry, config.filePath, index);
await client.query(
`
insert into public.postal_areas (
country_code,
postal_code,
display_name,
normalized_postal_code,
geom,
centroid,
search_radius_m,
metadata_json,
created_at,
updated_at
)
values (
$1,
$2,
$3,
$4,
ST_Multi(ST_SetSRID(ST_GeomFromGeoJSON($5), 4326)),
ST_Centroid(ST_SetSRID(ST_GeomFromGeoJSON($5), 4326))::geography,
greatest(1000, round(sqrt(ST_Area(ST_Transform(ST_SetSRID(ST_GeomFromGeoJSON($5), 4326), 3857)) / pi()))::integer),
$6::jsonb,
now(),
now()
)
on conflict (country_code, normalized_postal_code)
do update set
postal_code = excluded.postal_code,
display_name = excluded.display_name,
geom = excluded.geom,
centroid = excluded.centroid,
search_radius_m = excluded.search_radius_m,
metadata_json = excluded.metadata_json,
updated_at = now()
`,
[
config.countryCode,
rawPostalCode.trim(),
displayName,
normalizedPostalCode,
JSON.stringify(geometry),
JSON.stringify(feature.properties ?? {}),
],
);
insertedCount += 1;
if (index % logEvery === 0) {
logger.info(`Processed ${index} features, inserted ${insertedCount}, skipped ${skippedCount}`);
}
}
await client.query('commit');
logger.info(`Completed ${config.label} import. Inserted ${insertedCount} areas, skipped ${skippedCount}.`);
} catch (error) {
await client.query('rollback');
logger.error(`Import failed after processing ${index} features.`);
throw error;
} finally {
client.release();
}
}
async function run() {
const logger = createScriptLogger('postal-import');
for (const config of datasetConfigs) {
await importDataset(config);
}
logger.info('All postal datasets imported successfully.');
await getDbPool().end();
}
await run();