Files
app-test-nuxt/utils/module/fhirNameParser.ts
2025-11-26 07:49:54 +00:00

301 lines
8.2 KiB
TypeScript

// utils/fhirNameParser.ts
import type { FhirHumanName } from "~/types/fhir/humanName";
// Indonesian academic suffixes
import indonesianPrefixes from "~/data/indonesianPrefixes.json";
import indonesianSuffixes from "~/data/indonesianSuffixes.json";
const INDONESIAN_PREFIXES: readonly string[] = indonesianPrefixes;
const INDONESIAN_SUFFIXES: readonly string[] = indonesianSuffixes;
// Family name patterns
interface FamilyNamePattern {
pattern: RegExp;
category: string;
}
const FAMILY_NAME_PATTERNS: readonly FamilyNamePattern[] = [
// Pola untuk nama keluarga Batak
{
pattern:
/^(Siregar|Sitorus|Simanjuntak|Simatupang|Sinaga|Harahap|Hasibuan|Hutapea|Hutagalung|Hutabarat|Nasution|Lubis|Batubara|Rangkuti|Dalimunthe|Daulay|Matondang|Pulungan|Parinduri)$/i,
category: "batak"
},
// Pola untuk nama keluarga Mandailing
{
pattern:
/^(Nasution|Lubis|Batubara|Rangkuti|Dalimunthe|Daulay|Matondang|Pulungan|Parinduri)$/i,
category: "mandailing"
},
// Pola untuk nama keluarga Tionghoa
{
pattern:
/^(Tan|Lim|Lie|Ong|Tjoa|Oei|Kwee|The|Chong|Huang|Li|Wang|Zhang|Chen|Liu|Yang)$/i,
category: "chinese"
},
// Pola untuk nama keluarga Javanese-Chinese
{
pattern:
/^(Wijaya|Santoso|Gunawan|Susanto|Halim|Tjandra|Suharto|Prabowo|Wibowo|Setiawan)$/i,
category: "javanese-chinese"
},
// Pola untuk nama umum dengan akhiran
{
pattern:
/^(Putra|Putri|Wati|Ningrum|Sari|Dewi|Rahayu|Sukma|Ningsih|Rizki)$/i,
category: "common-suffix"
},
// Pola untuk nama keluarga Melayu-Arab
{
pattern: /^(bin|binti)$/i,
category: "malay-arabic"
},
// Pola untuk nama keluarga umum
{
pattern: /^(Sukma|Rizki|Dewi|Sari|Ningsih|Rahayu|Wati|Ningrum)$/i,
category: "common"
},
// Pola untuk nama keluarga Arab
{
pattern:
/^(Yusuf|Hassan|Ali|Fatimah|Aisyah|Zain|Husain|Khalid|Amin|Salim)$/i,
category: "arabic"
},
// Pola untuk nama keluarga Javanese
{
pattern:
/^(Suharto|Suharjo|Sukardi|Sukmawati|Kusuma|Prabowo|Wibowo|Setiawan)$/i,
category: "javanese"
},
// Pola untuk nama keluarga perempuan
{
pattern: /^(Sari|Wati|Ningrum|Dewi|Rahayu|Sukma|Ningsih)$/i,
category: "female-suffix"
},
// Pola untuk nama keluarga Sunda
{
pattern:
/^(Sukma|Sari|Dewi|Rahayu|Hidayah|Ningsih|Rizki|Suhendi|Sukardi)$/i,
category: "sundanese"
},
// Pola untuk nama keluarga Bali
{
pattern: /^(Putra|Putri|Wayan|Made|Nyoman|Ketut|Agung|Sukma|Dewi|Sari)$/i,
category: "balinese"
},
// Pola untuk nama keluarga Bugis
{
pattern: /^(Andi|Daeng|Puang|Sultan|Raja|Sitti|Baji|Makkunrai)$/i,
category: "bugis"
},
// Pola untuk nama keluarga Minang
{
pattern: /^(Sutan|Datuk|Raja|Pangeran|Haji|Sari|Datu|Raden)$/i,
category: "minang"
}
];
export function parseFhirHumanName(
fullName: string | null | undefined
): FhirHumanName | null {
if (!fullName || typeof fullName !== "string") {
return null;
}
const fhirName: FhirHumanName = {
use: "official",
text: fullName.trim(),
family: undefined,
given: [],
prefix: [],
suffix: []
};
let workingName = fullName.trim();
// Extract prefixes - accumulate consecutive prefixes into one string
const prefixesFound: string[] = [];
let prefixFound = true;
while (prefixFound) {
prefixFound = false;
for (const prefix of INDONESIAN_PREFIXES) {
const regex = new RegExp(`^${escapeRegExp(prefix)}\\s+`, "i");
if (regex.test(workingName)) {
prefixesFound.push(prefix);
workingName = workingName.replace(regex, "");
prefixFound = true;
break; // restart loop after removing one prefix
}
}
}
if (prefixesFound.length > 0) {
fhirName.prefix!.push(prefixesFound.join(" "));
} else {
// If no known prefix found, treat first word as prefix or given if single word
const firstSpaceIndex = workingName.indexOf(" ");
if (firstSpaceIndex !== -1) {
const firstWord = workingName.substring(0, firstSpaceIndex);
fhirName.prefix!.push(firstWord);
workingName = workingName.substring(firstSpaceIndex + 1).trim();
} else if (workingName.length > 0) {
// Single word name, treat as given
fhirName.given!.push(workingName);
workingName = "";
}
}
// Extract suffixes - improved to handle multiple suffixes separated by commas and spaces
fhirName.suffix = [];
let suffixesFound = true;
while (suffixesFound) {
suffixesFound = false;
for (const suffix of INDONESIAN_SUFFIXES) {
const regex = new RegExp(`(,?\\s+${escapeRegExp(suffix)})$`, "i");
if (regex.test(workingName)) {
if (!fhirName.suffix.includes(suffix)) {
fhirName.suffix.push(suffix);
}
workingName = workingName.replace(regex, "").trim();
suffixesFound = true;
break; // restart loop after removing one suffix
}
}
}
// Remove trailing commas after suffix removal
workingName = workingName.replace(/,\s*$/, "").trim();
// Remove trailing commas
workingName = workingName.replace(/,\s*$/, "").trim();
// Split name parts
const nameParts = workingName.split(/\s+/).filter((part) => part.length > 0);
if (nameParts.length === 0) {
return fhirName;
}
// Parse name parts with improved family name detection for multi-word family names
if (nameParts.length === 1) {
fhirName.given!.push(nameParts[0]);
} else if (nameParts.length === 2) {
if (isLikelyFamilyName(nameParts[1])) {
fhirName.family = nameParts[1];
fhirName.given!.push(nameParts[0]);
} else {
fhirName.given!.push(nameParts[0]);
fhirName.given!.push(nameParts[1]);
}
} else {
// Check last two words combined for family name
const lastTwoParts = nameParts.slice(-2).join(" ");
if (isLikelyFamilyName(lastTwoParts)) {
fhirName.family = lastTwoParts;
for (let i = 0; i < nameParts.length - 2; i++) {
fhirName.given!.push(nameParts[i]);
}
} else if (isLikelyFamilyName(nameParts[nameParts.length - 1])) {
fhirName.family = nameParts[nameParts.length - 1];
for (let i = 0; i < nameParts.length - 1; i++) {
fhirName.given!.push(nameParts[i]);
}
} else {
// Default: treat last two words as family name
fhirName.family = lastTwoParts;
for (let i = 0; i < nameParts.length - 2; i++) {
fhirName.given!.push(nameParts[i]);
}
}
}
// Clean up empty arrays
if (fhirName.prefix!.length === 0) delete fhirName.prefix;
if (fhirName.suffix!.length === 0) delete fhirName.suffix;
if (fhirName.given!.length === 0) delete fhirName.given;
if (!fhirName.family) delete fhirName.family;
return fhirName;
}
function isLikelyFamilyName(namePart: string): boolean {
return FAMILY_NAME_PATTERNS.some(({ pattern }) => pattern.test(namePart));
}
function escapeRegExp(string: string): string {
return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
export function formatFhirName(
fhirName: FhirHumanName | null | undefined
): string {
if (!fhirName) return "";
const parts: string[] = [];
if (fhirName.prefix && fhirName.prefix.length > 0) {
parts.push(fhirName.prefix.join(" "));
}
if (fhirName.given && fhirName.given.length > 0) {
parts.push(fhirName.given.join(" "));
}
if (fhirName.family) {
parts.push(fhirName.family);
}
if (fhirName.suffix && fhirName.suffix.length > 0) {
parts.push(fhirName.suffix.join(", "));
}
return parts.join(" ");
}
export function validateFhirHumanName(
fhirName: FhirHumanName | null | undefined
): string[] {
const errors: string[] = [];
if (!fhirName) {
errors.push("FHIR HumanName object is required");
return errors;
}
if (!fhirName.text || fhirName.text.trim() === "") {
errors.push("Text representation of name is required");
}
if ((!fhirName.given || fhirName.given.length === 0) && !fhirName.family) {
errors.push("At least one given name or family name is required");
}
const validUseValues: FhirHumanName["use"][] = [
"usual",
"official",
"temp",
"nickname",
"anonymous",
"old",
"maiden"
];
if (fhirName.use && !validUseValues.includes(fhirName.use)) {
errors.push(
`Invalid use value. Must be one of: ${validUseValues.join(", ")}`
);
}
return errors;
}