dc7686f507
Add progress logging and a status script for postal imports and neighbor builds, and ignore local raw and generated postal datasets.
144 lines
4.8 KiB
TypeScript
144 lines
4.8 KiB
TypeScript
import path from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { getDbPool } from '../../server/src/db/pool.js';
|
|
import { createScriptLogger } from './postal-logging.js';
|
|
import { assertGeometryIsWgs84, getFeatureGeometry, getStringProperty, normalizePostalCode, streamFeatureCollection, type PostalDatasetConfig } from './postal-import-utils.js';
|
|
|
|
const currentDir = path.dirname(fileURLToPath(import.meta.url));
|
|
const datasetsRoot = path.resolve(currentDir, '../datasets/postal');
|
|
|
|
const datasetConfigs: PostalDatasetConfig[] = [
|
|
{
|
|
countryCode: 'US',
|
|
label: 'US ZIP/ZCTA',
|
|
filePath: process.env.POSTAL_US_DATASET_PATH || path.join(datasetsRoot, 'us_zcta.geojson'),
|
|
postalCodeKeys: ['postal_code', 'zip', 'zcta', 'GEOID20', 'ZCTA5CE20', 'ZCTA5CE10'],
|
|
displayNameKeys: ['display_name', 'name', 'NAMELSAD20', 'GEOID20', 'postal_code'],
|
|
},
|
|
{
|
|
countryCode: 'CA',
|
|
label: 'Canada FSA',
|
|
filePath: process.env.POSTAL_CA_DATASET_PATH || path.join(datasetsRoot, 'ca_fsa.geojson'),
|
|
postalCodeKeys: ['postal_code', 'fsa', 'CFSAUID', 'CFSAUID24'],
|
|
displayNameKeys: ['display_name', 'name', 'CFSAUID', 'postal_code'],
|
|
},
|
|
];
|
|
|
|
const logEvery = Math.max(1, Number.parseInt(process.env.POSTAL_IMPORT_LOG_EVERY ?? '500', 10) || 500);
|
|
|
|
async function importDataset(config: PostalDatasetConfig) {
|
|
const pool = getDbPool();
|
|
const client = await pool.connect();
|
|
const logger = createScriptLogger(`postal-import:${config.countryCode.toLowerCase()}`);
|
|
let index = 0;
|
|
|
|
try {
|
|
let insertedCount = 0;
|
|
let skippedCount = 0;
|
|
|
|
logger.info(`Starting ${config.label} import from ${config.filePath}`);
|
|
|
|
await client.query('begin');
|
|
|
|
for await (const feature of streamFeatureCollection(config.filePath)) {
|
|
index += 1;
|
|
const rawPostalCode = getStringProperty(feature.properties, config.postalCodeKeys);
|
|
|
|
if (!rawPostalCode) {
|
|
skippedCount += 1;
|
|
if (index % logEvery === 0) {
|
|
logger.info(`Processed ${index} features, inserted ${insertedCount}, skipped ${skippedCount}`);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
const normalizedPostalCode = normalizePostalCode(config.countryCode, rawPostalCode);
|
|
if (!normalizedPostalCode) {
|
|
skippedCount += 1;
|
|
if (index % logEvery === 0) {
|
|
logger.info(`Processed ${index} features, inserted ${insertedCount}, skipped ${skippedCount}`);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
const displayName = getStringProperty(feature.properties, config.displayNameKeys) || normalizedPostalCode;
|
|
const geometry = getFeatureGeometry(feature, config.filePath, index);
|
|
assertGeometryIsWgs84(geometry, config.filePath, index);
|
|
|
|
await client.query(
|
|
`
|
|
insert into public.postal_areas (
|
|
country_code,
|
|
postal_code,
|
|
display_name,
|
|
normalized_postal_code,
|
|
geom,
|
|
centroid,
|
|
search_radius_m,
|
|
metadata_json,
|
|
created_at,
|
|
updated_at
|
|
)
|
|
values (
|
|
$1,
|
|
$2,
|
|
$3,
|
|
$4,
|
|
ST_Multi(ST_SetSRID(ST_GeomFromGeoJSON($5), 4326)),
|
|
ST_Centroid(ST_SetSRID(ST_GeomFromGeoJSON($5), 4326))::geography,
|
|
greatest(1000, round(sqrt(ST_Area(ST_Transform(ST_SetSRID(ST_GeomFromGeoJSON($5), 4326), 3857)) / pi()))::integer),
|
|
$6::jsonb,
|
|
now(),
|
|
now()
|
|
)
|
|
on conflict (country_code, normalized_postal_code)
|
|
do update set
|
|
postal_code = excluded.postal_code,
|
|
display_name = excluded.display_name,
|
|
geom = excluded.geom,
|
|
centroid = excluded.centroid,
|
|
search_radius_m = excluded.search_radius_m,
|
|
metadata_json = excluded.metadata_json,
|
|
updated_at = now()
|
|
`,
|
|
[
|
|
config.countryCode,
|
|
rawPostalCode.trim(),
|
|
displayName,
|
|
normalizedPostalCode,
|
|
JSON.stringify(geometry),
|
|
JSON.stringify(feature.properties ?? {}),
|
|
],
|
|
);
|
|
|
|
insertedCount += 1;
|
|
|
|
if (index % logEvery === 0) {
|
|
logger.info(`Processed ${index} features, inserted ${insertedCount}, skipped ${skippedCount}`);
|
|
}
|
|
}
|
|
|
|
await client.query('commit');
|
|
logger.info(`Completed ${config.label} import. Inserted ${insertedCount} areas, skipped ${skippedCount}.`);
|
|
} catch (error) {
|
|
await client.query('rollback');
|
|
logger.error(`Import failed after processing ${index} features.`);
|
|
throw error;
|
|
} finally {
|
|
client.release();
|
|
}
|
|
}
|
|
|
|
async function run() {
|
|
const logger = createScriptLogger('postal-import');
|
|
|
|
for (const config of datasetConfigs) {
|
|
await importDataset(config);
|
|
}
|
|
|
|
logger.info('All postal datasets imported successfully.');
|
|
await getDbPool().end();
|
|
}
|
|
|
|
await run();
|