Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

  1. to filter out proper names:
    1. ignore all words that start with a an UPPERCASE letter
  2. to filter out abbreviations:
    1. ignore all words that are fully in UPPERCASE
    2. ignore all words that are shorter than 3 chars
    3. ignore all words that start with a digit
    4. ignore all qnames, e.g., rdfs:subClassOf
  3. to filter out all IDMP-specific words - see the list below
  4. all remaining words are checked against the list from https://pypi.org/project/pyspellchecker/
    1. the list contains mostly nouns in the singular form - for the plural form the simple subtraction of the last letter is performed.

...

Expand
titleClick here to expand list of IDMP-specific words...


Code Block
languageyml
linenumberstrue
[
    "Avogadro",
    "Harpagophytum",
    "Planck",
    "ab02sh",
    "acceptability",
    "acetonide",
    "achiral",
    "adjuvant",
    "administrable",
    "afucosylated",
    "aldehyde",
    "aliskiren",
    "allergenic",
    "allergenicity",
    "aluminometasilicate",
    "amination",
    "aminoethoxy",
    "amlodipine",
    "anion",
    "annotation",
    "anthropoid",
    "approx",
    "aptamer",
    "benzathine",
    "benzylpenicillin",
    "besilate",
    "besylate",
    "bibliographic",
    "bioactive",
    "bioinformatics",
    "bioontology",
    "biopharma",
    "biopolymer",
    "biosimilar",
    "biphenyls",
    "bittner",
    "blackwell",
    "boolean",
    "bzip",
    "c20h25cln2o5",
    "c21h31cln2o9s",
    "c2cl",
    "cahn",
    "cation",
    "cc2",
    "cc2ccc",
    "cc3ccccc3",
    "cdc",
    "cdx",
    "charset",
    "checksum",
    "chelate",
    "chelated",
    "chiral",
    "chirality",
    "chiroptic",
    "chlorophenyl",
    "clathrate",
    "colourant",
    "comorbidity",
    "conceptualization",
    "conditionally",
    "conformance",
    "conformant",
    "conformer",
    "contemporarily",
    "contraindication",
    "cooccurring",
    "countable",
    "cryoprecipitate",
    "crystallizing",
    "csv",
    "culturing",
    "datamodel",
    "dataset",
    "datatype",
    "definitional",
    "definitionally",
    "degradant",
    "delimiter",
    "deoxyribose",
    "deprecated",
    "desacyl",
    "descriptor",
    "desmopressin",
    "dextrorotatory",
    "diacetate",
    "dialkyl",
    "diastereomer",
    "dibasic",
    "dicarboxylate",
    "diester",
    "dihydro",
    "dihydropyridine",
    "dihydroxy",
    "dimensionless",
    "dimethicone",
    "dipole",
    "discontinuous",
    "distributional",
    "disulfide",
    "disulphide",
    "dodecyl",
    "doseage",
    "drugbank",
    "dynavax",
    "electrophoresis",
    "eluted",
    "emulsifier",
    "enantiomer",
    "enol",
    "enumeration",
    "enumeration",
    "epimerase",
    "epimeric",
    "equimolar",
    "equivalence",
    "erythromycin",
    "esterification",
    "etiology",
    "eudra",
    "eudravigilance",
    "excipient",
    "extractive",
    "gentamicin",
    "germplasm",
    "github",
    "glaxosmithkline",
    "gln",
    "glutaraldehyde",
    "glycan",
    "glyceraldehyde",
    "glyceryl",
    "glycolate",
    "glycoprotein",
    "glycosylation",
    "glycyl",
    "gmbh",
    "goodchild",
    "guanidinyl",
    "gzip",
    "harmonization",
    "hartshorn",
    "hepatorenal",
    "heteroarenol",
    "heterogeneity",
    "homogenic",
    "homologous",
    "hornworts",
    "html",
    "http",
    "https",
    "hutton",
    "hydrogenation",
    "hydrolysis",
    "hydroxy",
    "hydroxyl",
    "hyperfine",
    "inactivation",
    "inchikey",
    "industrially",
    "infective",
    "informatics",
    "instrumentality",
    "interconversion",
    "interconvert",
    "interventional",
    "intraspecies",
    "investigational",
    "isoelectric",
    "isolatable",
    "isomer",
    "isomeric",
    "isomerization",
    "isotopically",
    "jsp",
    "kDa",
    "kanamycin",
    "ketone",
    "kmno4",
    "laevorotatory",
    "lexical",
    "ligand",
    "linker",
    "linkoping",
    "livertox",
    "liverworts",
    "logKow",
    "macrogol",
    "macromolecular",
    "maintainability",
    "macromolecularmartindale",
    "maintainabilitymcg",
    "martindalemedDRA",
    "mcgmediatype",
    "medDRAmedicament",
    "mediatypemednet",
    "mednetmereological",
    "mereologicalmesilate",
    "mesylate",
    "metamodel",
    "metoprolol",
    "microbiological",
    "microcrystalline",
    "microgram",
    "micronization",
    "micronized",
    "middlesex",
    "mixture",
    "modularization",
    "moieties",
    "moiety",
    "molfile",
    "monoacetate",
    "monobenzenesulphonate",
    "monoclonal",
    "monodisperse",
    "monoesters",
    "monohydrate",
    "monomethanesulfonate",
    "monophosphoryl",
    "morphilino",
    "mycin",
    "ncbi",
    "nci",
    "ncicb",
    "ncit",
    "nitroprusside",
    "nlm",
    "noncovalent",
    "nonprescription",
    "nonproprietary",
    "norvasc",
    "novartis",
    "nucleon",
    "nucleon",
    "nucleoside",
    "nuclide",
    "nuclide",
    "nullflavor",
    "nullflavored",
    "obolibrary",
    "octahedral",
    "oligo",
    "oligonucleotides",
    "oligosaccharide",
    "onboarded",
    "ontologies",
    "ontology",
    "optionally",
    "ordinating",
    "oxoacid",
    "paperboard",
    "pectoris",
    "pentahydrate",
    "permeation",
    "pfizer",
    "pharmacodynamic",
    "pharmacologic",
    "pharmacologically",
    "pharmacopeia",
    "pharmacopoeia",
    "pharmacopoeias",
    "pharmacovigilance",
    "phenotype",
    "phenotypic",
    "phosphodiester",
    "phosphorothioate",
    "phosphorylation",
    "phpid",
    "physiologic",
    "pistoia",
    "pka",
    "planar",
    "plantae",
    "plasmid",
    "plc",
    "pmc",
    "polyacrylamide",
    "polyclonal",
    "polydisperse",
    "polymerase",
    "polymerization",
    "polysaccharide",
    "postcoordinated",
    "potentiates",
    "preclinical",
    "predefined",
    "prefilled",
    "prefixed",
    "procumbens",
    "prodrug",
    "prolongation",
    "propranolol",
    "prospectively",
    "proteomics",
    "pubchem",
    "pubmed",
    "pyridinedicarboxylate",
    "qtu",
    "quantification",
    "quantifies",
    "quantitatively",
    "r",
    "rDNA",
    "racemate",
    "racemic",
    "radioanalytical",
    "radiolabelled",
    "radionuclide",
    "rdf",
    "reactivity",
    "reasoner",
    "referential",
    "reification",
    "reified",
    "resolvable",
    "reusability",
    "rfcs",
    "ribonucleic",
    "ribose",
    "ritonavir",
    "rke",
    "roundwood",
    "rxnorm",
    "semifinished",
    "separable",
    "serine",
    "simethicone",
    "solubility",
    "solvate",
    "sp2",
    "sp3",
    "spor",
    "stearate",
    "stereocenter",
    "stereochemical",
    "stereochemistries",
    "stereochemistry",
    "stereoisomer",
    "stoichiometric",
    "stoichiometrical",
    "stoichiometry",
    "subclass",
    "subclause",
    "submitter",
    "substituent",
    "substituent",
    "subtype",
    "subtype",
    "subunit",
    "subunit",
    "succinate",
    "sulphation",
    "superimposable",
    "superproperty",
    "supertype",
    "svhcs",
    "synthase",
    "synthetase",
    "taiwan",
    "tartrate",
    "tautomeric",
    "temporally",
    "terlipressin",
    "terminologically",
    "terminologies",
    "tetrahedral",
    "tetrahedron",
    "tetrahydrate",
    "thickener",
    "throughput",
    "thymine",
    "titration",
    "toxicant",
    "transduced",
    "transferase",
    "translatable",
    "translational",
    "triamcinolone",
    "trihydrate",
    "trinomial",
    "typographic",
    "uncompressed",
    "unencoded",
    "unformatted",
    "unicode",
    "uniprot",
    "unitage",
    "unitary",
    "unstandardized",
    "varices",
    "vasoactive",
    "vasopressin",
    "vasospastic",
    "vocabularies",
    "waals",
    "webservice",
    "wiswesser",
    "www",
    "xenobiotics",
    "xml",
    "xsd",
    "zlib",
    "\u03b1",
    "\u03b3",
    "\u03b5",
    "\u03bc",
    "\u03bcg",
    "\u03bd",
    "\u03c1"
]


...