pax_global_header00006660000000000000000000000064146375037550014531gustar00rootroot0000000000000052 comment=230ca152faaa03685afeb8b9f11436749ccc6cc0 ChemOnomatopist-0.10.0/000077500000000000000000000000001463750375500147205ustar00rootroot00000000000000ChemOnomatopist-0.10.0/.gitignore000066400000000000000000000000731463750375500167100ustar00rootroot00000000000000.build ChemOnomatopist-* container.sif nytprof nytprof.out ChemOnomatopist-0.10.0/Changes000066400000000000000000000061551463750375500162220ustar00rootroot000000000000000.10.0 2024-06-28 - Implement support for acyl halides. - Implement support for chalcogen chains. - Implement support for nitramides. - Implement support for noncarbonic oxoacids. - Implement support for sulfimides, sulfinamides and sulfonamides. - Postprocess molecular graphs after main chain identification. - Add nonstandard valences to side chains. 0.9.0 2024-05-30 - Implement support for a(ba)_n chains. - Implement support for ace...ylenes. - Implement support for picenes. - Implement support for salts. - Implement rules to prefer charged atoms. - Implement rules for rings. - Extend the definition for amides. - Extend the definition for carboxylic, sulfinic and sulfonic acids. - Extend the definition for sulfinyl and sulfonyl groups. - Rewrite handling of indicated hydrogen atoms. - Rewrite rules for isotopes. - Implement '--opsin' and '--isomorphism' command line options in 'chemonomatopist' executable. 0.8.0 2024-04-09 - 30% correctness rate for all PINs from the Blue Book (2013). - Implement von Baeyer names. - Implement support for peroxide. - Implement parent chain selection rules based on indicated hydrogens and isotopes. - Better purine detection. - Better carboxyl naming. - Better ester naming. 0.7.0 2024-02-23 - 27.7% correctness rate for all PINs from the Blue Book (2013). - Implement nonstandard valences. - Implement support for fluorene. - Implement support for polyhelicene. - Implement support for amidines. - Implement support for diazenes. - Implement support for isocyanates and isocyanides. - Implement support for urea. - Fix issues with naming carbaldehydes, carbonitriles and carboxyls. 0.6.1 2023-10-31 - Implement partial support for porphyrins. - Implement support for anilino groups. - Simplify detection of ring membership. - Fix issues with cyclic amides. 0.6.0 2023-10-13 - Implement support for polyacenes, polyaphenes, xanthenes and similar compounds. - Implement support for amides, ethers, hydrazides, hydrazines and sulfo groups. - Fix many issues with parent chain detection. - Implement '--debug' mode in 'chemonomatopist' executable. 0.5.0 2023-06-16 - Implement support for bicyclic compounds. - Implement support for ketone analogues, nitro, nitroso and XO3 groups. - Implement partial support for guanidine. - Implement '--check' mode in 'chemonomatopist' executable. 0.4.0 2023-05-30 - Implement support for monocycles and monospiro compounds. - Implement support for some nitrogen- and sulfur-based compound classes. 0.3.0 2023-05-10 - 99.6% correctness rate for unbranched and branched saturated acyclic hydrocarbons for PubChem database downloaded on 2022-04-21. - Implement support for some compound classes having oxygen atoms. - Implement support for heteroatoms. 0.2.0 2022-05-25 - 97.5% correctness rate for unbranched and branched saturated acyclic hydrocarbons for PubChem database downloaded on 2022-04-21. 0.1.0 2022-01-18 - Initial release. ChemOnomatopist-0.10.0/LICENSE000066400000000000000000000027331463750375500157320ustar00rootroot00000000000000Copyright (c) The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ChemOnomatopist-0.10.0/README.md000066400000000000000000000027561463750375500162110ustar00rootroot00000000000000# ChemOnomatopist *ChemOnomatopist* is a tool to derive IUPAC systematic names for chemical structures: $ echo C=C1C=CC=C1 | chemonomatopist C=C1C=CC=C1 5-methylidenecyclopenta-1,3-diene *ChemOnomatopist* analyses chemical graphs to determine IUPAC names according to the [Nomenclature of Organic Chemistry. IUPAC Recommendations and Preferred Names 2013](https://iupac.qmul.ac.uk/BlueBook/PDF/BlueBookV3.pdf), also known as the Blue Book. ## Installation *ChemOnomatopist* is written in Perl. The easiest way to install it is using [Dist::Zilla](https://metacpan.org/release/Dist-Zilla): $ git clone https://github.com/merkys/ChemOnomatopist $ dzil install ## Support This tool is under development. Support for the variety of organic chemical compounds is underway. Currently *ChemOnomatopist* supports: * Branched acyclic compounds * Monocycles and monospiro compounds * Bicyclic compounds * Regular polycylic aromatic hydrocarbons (polyacenes, polyaphenes, xanthenes and others) Issues known to produce names deviating from IUPAC guidelines: * Possibly incorrect seniority for *tert* substituents (Blue Book is not quite clear about them) * Incorrect addition/omission of unambiguous locants and parentheses * Monocycles with multiple substituents are sometimes named incorrectly * Linear hetero compounds composed of repeating units are usually named incorrectly ## Contributors * Miglė Urbonaitė ## License *ChemOnomatopist* is free software licensed under BSD-3-Clause license. ChemOnomatopist-0.10.0/bin/000077500000000000000000000000001463750375500154705ustar00rootroot00000000000000ChemOnomatopist-0.10.0/bin/chemonomatopist000077500000000000000000000074041463750375500206340ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; # VERSION use ChemOnomatopist; use File::Basename qw( basename ); use Getopt::Long::Descriptive; use List::Util qw( sum ); =head1 NAME chemonomatopist - derive IUPAC systematic names for chemical structures =head1 SYNOPSYS chemonomatopist [args] [files] =head1 DESCRIPTION ChemOnomatopist analyses chemical graphs to determine IUPAC names according to the "Nomenclature of Organic Chemistry. IUPAC Recommendations and Preferred Names 2013", also known as the Blue Book. =cut my $basename = basename $0; my( $opt, $usage ) = describe_options( <<"END" . 'OPTIONS', USAGE $basename [] [] DESCRIPTION $basename derives IUPAC systematic names for chemical structures END [ 'check', 'treat the input as list of SMILES and IUPAC names and check whether given and generated names match' ], [ 'opsin', 'along \'check\', attempt parsing own-generated IUPAC names with OPSIN (requires SmilesScripts::OPSIN Perl module)' ], [ 'isomorphism', 'along \'check\' and \'opsin\', perform isomorphism checks for nonmatching chemical names (requires SmilesScripts::Isomorphism Perl module)' ], [], [ 'cautious', 'avoid experimental features, refuse processing only partially supported compounds' ], [ 'debug', 'turn on the debug mode' ], [], [ 'help', 'print usage message and exit', { shortcircuit => 1 } ], [ 'version', 'print version and exit', { shortcircuit => 1 } ], ); if( $opt->help ) { print $usage->text; exit; } if( $opt->version ) { print $ChemOnomatopist::VERSION, "\n"; exit; } my $opsin; if( $opt->opsin || $opt->isomorphism ) { require SmilesScripts::OPSIN; $opsin = SmilesScripts::OPSIN->new; } $ChemOnomatopist::CAUTIOUS = 1 if $opt->cautious; $ChemOnomatopist::DEBUG = 1 if $opt->debug; my %counts = ( OK => 0, FAIL => 0, ERROR => 0 ); local $\ = "\n"; while (<>) { chomp; my( $id, $given_name, $SMILES ); if( $opt->check ) { ( $id, $given_name, $SMILES ) = split "\t", $_; } else { $SMILES = $_; } my $derived_name; eval { $derived_name = ChemOnomatopist::get_name( $SMILES ); }; $@ =~ s/\n$// if $@; if( $opt->check ) { if( $@ ) { print $id, "\t", 'ERROR', "\t", $@; $counts{ERROR}++; } elsif( $given_name eq $derived_name ) { print $id, "\t", 'OK', "\t", $derived_name; $counts{OK}++; } elsif( $opt->isomorphism ) { require SmilesScripts::Isomorphism; my $SMILES_derived = $opsin->IUPAC_to_SMILES( "$derived_name" ); if( !defined $SMILES_derived ) { print $id, "\t", 'MISPARSE', "\t", $derived_name; $counts{MISPARSE}++; next; } my $reason = SmilesScripts::Isomorphism::smi_compare( $SMILES, $SMILES_derived ); if( $reason eq 'isomorphic' ) { print $id, "\t", 'ALT', "\t", $given_name, "\t", $derived_name; $counts{ALT}++; } else { print $id, "\t", 'FAIL', "\t", $given_name, "\t", $derived_name; $counts{FAIL}++; } } elsif( $opt->opsin && !defined $opsin->IUPAC_to_SMILES( "$derived_name" ) ) { print $id, "\t", 'MISPARSE', "\t", $derived_name; $counts{MISPARSE}++; } else { print $id, "\t", 'FAIL', "\t", $given_name, "\t", $derived_name; $counts{FAIL}++; } } else { if( $@ ) { print STDERR $SMILES, "\t", $@; } else { print $SMILES, "\t", $derived_name; } } } if( $opt->check ) { print ''; for (reverse sort keys %counts) { print "$_:\t", $counts{$_}; } print "TOTAL:\t", sum values %counts; } ChemOnomatopist-0.10.0/container.def000066400000000000000000000011241463750375500173600ustar00rootroot00000000000000# Definition for an Apptainer/Singularity container used to check for missing dependencies. Bootstrap: docker From: debian:unstable %post apt-get update apt-get install --yes libalgorithm-combinatorics-perl cpanminus git libchemistry-opensmiles-perl libdbi-perl libdist-zilla-perl libdist-zilla-plugin-autometaresources-perl libdist-zilla-plugin-ourpkgversion-perl libgraph-perl libgraph-moreutils-perl libgraph-nauty-perl libipc-run3-perl libset-object-perl cpanm Graph::Grammar git clone https://github.com/merkys/ChemOnomatopist cd ChemOnomatopist dzil test --noauth ChemOnomatopist-0.10.0/dist.ini000066400000000000000000000016101463750375500163620ustar00rootroot00000000000000name = ChemOnomatopist author = Andrius Merkys license = BSD copyright_holder = Andrius Merkys copyright_year = 2021-2024 version = 0.10.0 [@Filter] -bundle = @Basic -remove = License [AutoMetaResources] homepage = https://search.cpan.org/dist/%{dist} repository.github = user:merkys bugtracker.github = user:merkys [MetaJSON] [OurPkgVersion] [Prereqs] Algorithm::Combinatorics = 0 Chemistry::Isotope = 0 Chemistry::OpenSMILES = 0.8.6 Clone = 0 Getopt::Long::Descriptive = 0 Graph = 0.9726 Graph::Grammar = 0 Graph::MoreUtils = 0.2.0 Graph::Nauty = 0 List::Util = 0 Scalar::Util = 0 Set::Object = 0 [Prereqs / Test] -phase = test Algorithm::Combinatorics = 0 DBI = 0 IPC::Run3 = 0 Test::More = 0 ChemOnomatopist-0.10.0/lib/000077500000000000000000000000001463750375500154665ustar00rootroot00000000000000ChemOnomatopist-0.10.0/lib/ChemOnomatopist.pm000066400000000000000000002076621463750375500211520ustar00rootroot00000000000000package ChemOnomatopist; # ABSTRACT: Give molecule a name # VERSION use strict; use warnings; use ChemOnomatopist::Chain; use ChemOnomatopist::Chain::Aceylene; use ChemOnomatopist::Chain::Amide; use ChemOnomatopist::Chain::Amine; use ChemOnomatopist::Chain::Bicycle; use ChemOnomatopist::Chain::Carboxamide; use ChemOnomatopist::Chain::Circular; use ChemOnomatopist::Chain::Ether; use ChemOnomatopist::Chain::Fluorene; use ChemOnomatopist::Chain::FromHalves; use ChemOnomatopist::Chain::Imine; use ChemOnomatopist::Chain::Monocycle; use ChemOnomatopist::Chain::Monospiro; use ChemOnomatopist::Chain::Phenanthrene; use ChemOnomatopist::Chain::Picene; use ChemOnomatopist::Chain::Polyacene; use ChemOnomatopist::Chain::Polyaphene; use ChemOnomatopist::Chain::Polyhelicene; use ChemOnomatopist::Chain::Porphyrin; use ChemOnomatopist::Chain::VonBaeyer; use ChemOnomatopist::Chain::Xanthene; use ChemOnomatopist::ChainHalf; use ChemOnomatopist::Comparable::Array::Numeric; use ChemOnomatopist::Comparable::Array::Isotope::By::AtomicNumber; use ChemOnomatopist::Comparable::Array::Isotope::By::MassNumber; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Grammar qw( parse_molecular_graph ); use ChemOnomatopist::Group; use ChemOnomatopist::Group::Amide; use ChemOnomatopist::Group::Amine; use ChemOnomatopist::Group::Ether; use ChemOnomatopist::Group::Imine; use ChemOnomatopist::Group::Sulfinyl; use ChemOnomatopist::MolecularGraph; use ChemOnomatopist::Name; use ChemOnomatopist::Name::Part::Stem; use ChemOnomatopist::Util qw( all_max all_min cmp_arrays copy zip ); use ChemOnomatopist::Util::Graph qw( BFS_calculate_chain_length BFS_is_chain_branched cyclic_components graph_center graph_cycle_core graph_cycles graph_has_cycle graph_longest_paths_from_vertex graph_path_between_vertices graph_without_edge_attributes subgraph tree_branch_positions tree_number_of_branches ); use ChemOnomatopist::Util::SMILES qw( cycle_SMILES_explicit ); use Chemistry::OpenSMILES qw( %bond_symbol_to_order is_double_bond is_single_bond is_triple_bond ); use Graph::MoreUtils qw( SSSR graph_replace ); use Graph::Nauty qw( are_isomorphic ); use Graph::Traversal::DFS; use Graph::Undirected; use List::Util qw( all any first max min pairs uniq ); use Scalar::Util qw( blessed ); use Set::Object qw( set ); no warnings 'recursion'; our $CAUTIOUS = ''; our $DEBUG = ''; sub get_name { my( $what ) = @_; # Detect the type of the input data my( $graph ); if( blessed $what && $what->isa( Graph::Undirected:: ) ) { $graph = ChemOnomatopist::MolecularGraph->new( $what ); } else { # Assume SMILES string die "cannot handle stereochemistry now\n" if $what =~ /[\\\/@]/; require Chemistry::OpenSMILES::Parser; my $parser = Chemistry::OpenSMILES::Parser->new; my @graphs = map { ChemOnomatopist::MolecularGraph->new( $_ ) } $parser->parse( $what ); # Detecting and naming salts if( !$CAUTIOUS && @graphs == 2 && (any { $_->is_anion } @graphs) && (any { $_->is_cation } @graphs) ) { return join ' ', map { get_name( $_ ) } sort { $a->is_anion <=> $b->is_anion } @graphs; } die "separate molecular entities are not handled yet\n" if @graphs > 1; $graph = shift @graphs; } die "nothing supplied for get_name()\n" unless $graph; if( any { $_ ne 'H' && !exists $elements{$_} } map { ucfirst $_->{symbol} } $graph->vertices ) { die "unknown elements detected\n"; } find_groups( $graph ); my $main_chain = select_mainchain( $graph ); return get_mainchain_name( $graph, $main_chain ); } # get_sidechain_name() receives a graph and a position to start the chain in it. # From that position it finds the longest chain and returns the constructed name. # $parent is the out-of-chain atom from which this chain has been reached. # $start is the in-chain atom having a bond with $parent. sub get_sidechain_name { my( $graph, $parent, $start ) = @_; # Record the type of parent bond my $parent_bond = '-' if $parent; if( blessed $start && $start->isa( ChemOnomatopist::Chain:: ) ) { my $attachment_point = first { $graph->has_edge( $parent, $_ ) } $start->vertices; if( $attachment_point && $graph->has_edge_attribute( $parent, $attachment_point, 'bond' ) ) { $parent_bond = $graph->get_edge_attribute( $parent, $attachment_point, 'bond' ); } } else { if( $graph->has_edge_attribute( $parent, $start, 'bond' ) ) { $parent_bond = $graph->get_edge_attribute( $parent, $start, 'bond' ); } } # Groups that cannot be included in the chain do not matter my $branches_at_start = grep { !blessed $_ || ( element( $_ ) && element( $_ ) eq 'C' ) } grep { !$parent || $_ != $parent } $graph->neighbours( $start ); my $chain; if( blessed $start && $start->isa( ChemOnomatopist::Chain:: ) ) { $chain = $start; $chain->parent( $parent ) if $parent; } elsif( $graph->groups( $start ) ) { ( $chain ) = $graph->groups( $start ); $chain->parent( $parent ) if $parent; } else { $chain = select_sidechain( $graph, $parent, $start ); } my @chain = $chain->vertices; # Handle non-carbon substituents, according to BBv3 P-29.3.1 and P-68.2.2 if( @chain == 1 && $graph->degree( @chain ) == 0 + defined $parent && !blessed $chain[0] && element( $chain[0] ) ne 'C' && exists $elements{element( $chain[0] )} ) { my $symbol = element( $chain[0] ); my $element = $elements{$symbol}->{prefix}; my $name = ChemOnomatopist::Name->new; if( $chain->nonstandard_valences ) { $name->append_locants( map { 'λ' . $_ } $chain->nonstandard_valences ); } if( $symbol eq 'Al' ) { $element = 'aluman'; } elsif( $symbol eq 'As' ) { $element = 'arsan'; } elsif( $symbol eq 'P' ) { $element = 'phosphan'; } elsif( $symbol eq 'S' ) { $element = 'sulfan'; } elsif( $symbol ne 'B' ) { $element =~ s/a$//; } if( $symbol =~ /^(Cl|Br|F|I)$/ ) { $element .= 'o'; } else { $element .= 'yl'; } $element .= 'idene' if $parent_bond eq '='; $element .= 'idyne' if $parent_bond eq '#'; if( exists $chain[0]->{isotope} ) { $name .= '(' . $chain[0]->{isotope} . $symbol . ')'; } return $name->append_element( $element ); } # Collect heteroatoms my %heteroatoms; for (pairs zip $chain->heteroatoms, $chain->heteroatom_positions) { my( $element, $i ) = @$_; push @{$heteroatoms{$element}}, $i; } # Examine the attachments to the main chain: delete the edges # connecting them to the main chain, at the same time giving them # names according to their lengths via calls to get_sidechain_name() my %attachments; for my $i (0..$#chain) { my $atom = $chain[$i]; for my $neighbour ($graph->neighbours( $atom )) { next if any { $neighbour == $_ } @chain; # Skip atoms from this chain next if $parent && $neighbour == $parent; my $attachment = get_sidechain_name( $graph, $atom, $neighbour ); if( $chain->needs_ane_suffix && $attachment =~ /yl$/ ) { # FIXME: Properly detect the 'yl' suffix and replace it with suffix object $attachment->pop_yl; $attachment .= $attachment eq 'hydrazin' ? 'e' : 'ane'; } $attachments{$attachment} = [ $attachment ] unless $attachments{$attachment}; push @{$attachments{$attachment}}, $i; } } # Collecting names of all the attachments my $name = ChemOnomatopist::Name->new; my @order = sort { $a cmp $b } keys %attachments; for my $i (0..$#order) { my $attachment_name = $order[$i]; my( $attachment, @positions ) = @{$attachments{$attachment_name}}; if( $chain->needs_substituent_locants ) { $name->append_locants( $chain->locants( @positions ) ); } if( @positions > 1 ) { my $number = IUPAC_numerical_multiplier( scalar @positions ); $number .= 'a' unless $number =~ /^(|\?|.*i)$/; $name->append_multiplier( $number ); # FIXME: More rules from BBv3 P-16.3.4 should be added if( $attachment->has_substituent_locant || # BBv3 P-16.3.4 (a) $attachment->starts_with_multiplier || # BBv3 P-16.3.4 (c) $attachment =~ /^dec/ || # BBv3 P-16.3.4 (d) $attachment =~ /^[0-9]/ ) { $attachment->bracket; } } else { if( !$chain->isa( ChemOnomatopist::Chain::Ether:: ) && !$attachment->is_enclosed && (!$attachment->is_simple || $attachment->starts_with_locant) ) { # BBv3 P-16.5.1.1 $attachment->bracket; } } # Enclose all but the first attachment in a chain not needing substituent locants if( !$attachment->is_enclosed && !$chain->needs_substituent_locants && @order > 1 && $i > 0 ) { $attachment->bracket; } $name .= $attachment; } $name .= $chain->indicated_hydrogens_part; # Collecting names of all heteroatoms for my $element (sort { $elements{$a}->{seniority} <=> $elements{$b}->{seniority} } keys %heteroatoms) { if( $chain->needs_heteroatom_locants ) { $name->append_locants( $chain->locants( @{$heteroatoms{$element}} ) ); } if( $chain->needs_heteroatom_names ) { if( @{$heteroatoms{$element}} > 1 ) { my $number = IUPAC_numerical_multiplier( scalar @{$heteroatoms{$element}} ); $number .= 'a' unless $number =~ /^(|\?|.*i)$/; $name .= $number; } if( $element eq 'S' ) { $name->append_element( 'sulfan' ); } else { $name->append_element( $elements{$element}->{prefix} ); } } } $name .= $chain->isotope_part; # Record the parent $chain->parent( $parent ) if $chain->can( 'parent' ); if( $chain->isa( ChemOnomatopist::Chain::Circular:: ) || $chain->isa( ChemOnomatopist::Group:: ) || ($chain->isa( ChemOnomatopist::Chain::Amine:: ) && @chain == 1) || (blessed $chain eq ChemOnomatopist::Chain:: && @chain == 1 && blessed $chain[0]) ) { my $prefix = $chain->prefix; # All groups are most likely stems $prefix = ChemOnomatopist::Name->new( $prefix ) unless blessed $prefix; if( blessed $chain[0] && $chain[0]->isa( ChemOnomatopist::Group::Sulfinyl:: ) && $name =~ /yl$/ ) { # BBv2 P-63.6 $name->pop_yl; $name .= 'ane'; } $name .= $prefix; } else { if( $chain->isa( ChemOnomatopist::Chain::Ether:: ) ) { if( $name->has_substituent_locant && !$name->is_enclosed ) { $name->bracket; } else { $name->pop_yl; } } my $prefix = $chain->prefix; if( $prefix eq 'nyl' && $name->ends_with_element && $name->[-1] eq 'sila' ) { $name->[-1]{value} =~ s/a$//; } else { $name .= $prefix; $name->pop_yl; } if( $branches_at_start > 1 ) { my $branch_point = first { $chain[$_] == $start } 0..$#chain; if( $branch_point || !$chain->is_saturated ) { # According to BBv3 P-29.2 (1) $name .= 'an' unless $chain->number_of_double_bonds; $name->append_substituent_locant( $chain->locants( $branch_point ) ); } } elsif( $chain->is_hydrocarbon && $chain->number_of_multiple_bonds && $chain->needs_multiple_bond_locants ) { $name->append_substituent_locant( $chain->locants( 0 ) ); } $name .= 'yl' unless $name =~ /[oy]$/; } if( $chain->needs_multiple_bond_suffix ) { $name .= 'idene' if $parent_bond && $parent_bond eq '='; $name .= 'idyne' if $parent_bond && $parent_bond eq '#'; } $name = ChemOnomatopist::Name->new( 'acetyl' ) if $name eq '1-oxoethyl'; $name = ChemOnomatopist::Name->new( 'benzyl' ) if $name eq 'phenylmethyl'; # Detecting anilino- group if( @$name >= 2 && $name->[-2] eq 'phenyl' && $name->[-1] eq 'amino' ) { pop @$name; pop @$name; $name .= 'anilino'; } return $name; } sub get_mainchain_name { my( $graph, $chain ) = @_; my @vertices = $graph->vertices; my @chain = $chain->vertices; my @groups = most_senior_groups( $graph ); # The following condition adjusts the seniority order by moving ethers below cycles if( @groups && (all { $_->isa( ChemOnomatopist::Group::Ether:: ) } @groups) && $chain->isa( ChemOnomatopist::Chain::Circular:: ) ) { @groups = ( $chain ); } # Collect heteroatoms and nonstandard bonding numbers from the chain my %heteroatoms; for (pairs zip $chain->heteroatoms, $chain->heteroatom_positions) { my( $element, $i ) = @$_; push @{$heteroatoms{$element}}, $i; } my %nonstandard_valences; for (pairs zip $chain->nonstandard_valences, $chain->nonstandard_valence_positions) { my( $valence, $i ) = @$_; $nonstandard_valences{$i} = $valence; } # Collect the substituents my %attachments; my @senior_group_attachments; for my $i (0..$#chain) { my $atom = $chain[$i]; for my $neighbour ($graph->neighbours( $atom )) { next if any { $_ == $neighbour } @chain; # Skip atoms from this chain my( $group ) = $graph->groups( $neighbour ); if( grep { $_ == $neighbour } @groups ) { push @senior_group_attachments, $i; } else { my $attachment = get_sidechain_name( $graph, $atom, $group ? $group : $neighbour ); if( $chain->needs_ane_suffix ) { if( $attachment eq 'phenyl' ) { $attachment = ChemOnomatopist::Name->new( 'benzene' ); } elsif( $attachment =~ /yl$/ ) { # FIXME: Properly detect the 'yl' suffix and replace it with suffix object $attachment->pop_yl; $attachment .= $attachment eq 'hydrazin' ? 'e' : 'ane'; } } $attachments{$attachment} = [ $attachment ] unless $attachments{$attachment}; push @{$attachments{$attachment}}, $i; } } } # Collecting names of all the attachments my @order = sort { cmp_only_alphabetical( $a, $b ) || $a cmp $b } keys %attachments; my $name = ChemOnomatopist::Name->new; for my $i (0..$#order) { my $attachment_name = $order[$i]; my( $attachment, @positions ) = @{$attachments{$attachment_name}}; if( $chain->needs_substituent_locants ) { $name->append_locants( $chain->locants( @positions ) ); } if( @positions > 1 ) { my $number; if( $attachment =~ /^bi/ || ( $attachment->is_simple && !$attachment->starts_with_multiplier && $attachment !~ /^sulfanyl/ ) ) { # BBv3 P-16.3.6 (b) $number = IUPAC_numerical_multiplier( scalar @positions ); $number .= 'a' unless $number =~ /^(|\?|.*i)$/; } else { $number = IUPAC_complex_numerical_multiplier( scalar @positions ); } $name .= $number; # BBv2 P-16.3.4 (a) if( !$attachment->is_enclosed && ( $attachment =~ /^dec/ || # BBv2 P-16.3.4 (d) $attachment =~ /^sulfanyl/ || # BBv3 P-16.3.6 (b) !$attachment->is_simple || $attachment->has_isotope || $attachment->has_substituent_locant ) ) { $attachment->bracket; } } else { # This is an attempt to implement rules from P-16.5.1. # However, they are quite vague, thus there is not much of guarantee the following code is correct. if( !$attachment->is_enclosed && ($attachment->has_locant || !$attachment->is_simple) && $attachment ne 'tert-butyl' ) { $attachment->bracket; } } # FIXME: More rules from BBv2 P-16.3.4 and P-16.5.1 should be added if( !$attachment->is_enclosed && ( $attachment->starts_with_multiplier || # BBv2 P-16.3.4 (c) $attachment =~ /^[0-9]/ ) ) { $attachment->bracket; } if( !$attachment->is_enclosed && !$chain->needs_substituent_locants && $i > 0 ) { $attachment->bracket; } if( $chain->isa( ChemOnomatopist::Group::Amidine:: ) && !$attachment->is_enclosed && $i == $#order ) { # This is not nice, albeit works; have to look for a better solution. $attachment->[-1] =~ s/yl$/ane/; $attachment->[-1] =~ s/e$// if $chain->suffix =~ /^i/; } $name .= $attachment; } $name .= $chain->indicated_hydrogens_part; # Collecting names of all heteroatoms for my $element (sort { $elements{$a}->{seniority} <=> $elements{$b}->{seniority} } keys %heteroatoms) { my @locants; for my $i (@{$heteroatoms{$element}}) { my $locant = ''; if( $chain->needs_heteroatom_locants ) { ( $locant ) = $chain->locants( $i ); } if( exists $nonstandard_valences{$i} ) { $locant .= 'λ' . $nonstandard_valences{$i}; } push @locants, $locant unless $locant eq ''; } $name->append_locants( @locants ); if( $chain->needs_heteroatom_names ) { if( @{$heteroatoms{$element}} > 1 ) { my $number = IUPAC_numerical_multiplier( scalar @{$heteroatoms{$element}} ); $number .= 'a' unless $number =~ /^(|\?|.*i)$/; $name .= $number; } if( $element eq 'S' && all { element( $_ ) eq 'S' } $chain->vertices ) { $name->append_element( 'sulfa' ); } else { $name->append_element( $elements{$element}->{prefix} ); } } } $name .= $chain->isotope_part; $name .= $chain->suffix; if( @groups && all { !$_->isa( ChemOnomatopist::Chain:: ) } @groups ) { # If the most senior group is carbon, thus it is in the chain as well my $groups = set( grep { element( $_ ) && element( $_ ) eq 'C' } @groups ); for (0..$#chain) { push @senior_group_attachments, $_ if $groups->has( $chain[$_] ); } # Terminal locants are not cited for 1 or 2 senior group attachments according to BBv2 P-14.3.4.1 if( $chain->needs_suffix_locant ) { $name->append_locants( sort { cmp_locants( $a, $b ) } $chain->locants( @senior_group_attachments ) ); } my $suffix; if( $chain->isa( ChemOnomatopist::Chain::Circular:: ) ) { $suffix = $groups[0]->suffix_if_cycle_substituent; } elsif( @senior_group_attachments > 2 ) { $suffix = $groups[0]->multisuffix; } else { $suffix = $groups[0]->suffix; } if( @senior_group_attachments > 1 && blessed $suffix && $suffix->starts_with_multiplier ) { $suffix->bracket; } my $number; if( blessed $suffix && $suffix->is_enclosed ) { $number = IUPAC_complex_numerical_multiplier( scalar @senior_group_attachments ); } else { $number = IUPAC_numerical_multiplier( scalar @senior_group_attachments ); $number = '' if $number eq 'mono'; $number .= 'a' unless $number =~ /^(|\?|.*i)$/; } $name->append_multiplier( $number ); $name->append_suffix( $suffix ); } my $charge_part = $chain->charge_part; if( "$charge_part" ) { if( !$charge_part->has_multiplier ) { $name->pop_e; pop @$name if $name->ends_with_alkane_an_suffix; } $name .= $charge_part; } $name =~ s/benzen(-1-)?ol$/phenol/; $name = 'anisole' if $name eq 'methoxybenzene'; # BBv2 P-63.2.4.1 $name = 'benzoic acid' if $name eq 'benzenecarboxylic acid'; # BBv2 P-65.1.1.1 $name = 'acetylene' if $name eq 'ethyne'; # BBv3 P-52.2.1.1 $name = 'toluene' if $name eq 'methylbenzene'; $name =~ s/^(\d,\d-)dimethylbenzene$/$1xylene/; $name = 'formic acid' if $name eq 'methanoic acid'; $name =~ s/methanamide$/formamide/; $name =~ s/ethanamide$/acetamide/; $name =~ s/ethan(-1-)?o(ate|ic acid)$/acet$2/; # BBv2 P-65.1.1.1 $name =~ s/benzen(-1-)?amine$/aniline/; $name =~ s/(benz)(ene-1-carb)(aldehyde|onitrile)$/$1$3/; $name =~ s/benzene-1-carboxylic acid$/benzoic acid/; return $name; } sub find_groups { my( $graph ) = @_; # Attaching hydrogen atoms to their heavier neighbours for my $H (grep { is_element( $_, 'H' ) } $graph->vertices) { die "cannot handle shared hydrogen atoms\n" if $graph->degree( $H ) > 1; my( $parent ) = $graph->neighbours( $H ); die "cannot handle compounds with H-H bonds\n" if is_element( $parent, 'H' ); $parent->{hcount} = 0 unless exists $parent->{hcount}; $parent->{hcount}++; push @{$parent->{h_isotope}}, $H->{isotope}; $graph->delete_vertex( $H ); } # Recording nonstandard bonding numbers for my $atom ($graph->vertices) { next unless exists $elements{element( $atom )}->{standard_bonding_number}; my $valence = 0; $valence += $atom->{charge} if $atom->{charge}; $valence += $atom->{hcount} if $atom->{hcount}; for my $neighbour ($graph->neighbours( $atom )) { my $order = 1; if( $graph->has_edge_attribute( $atom, $neighbour, 'bond' ) ) { $order = $bond_symbol_to_order{$graph->get_edge_attribute( $atom, $neighbour, 'bond' )}; } $valence += $order; } next if $valence == $elements{element( $atom )}->{standard_bonding_number}; $atom->{valence} = $valence; } # Detecting cyclic compounds my @ring_systems = cyclic_components( $graph ); # Aromatising mancudes - experimental for my $core (@ring_systems) { my %valences; for my $edge ($core->edges) { my $order = 1; if( $core->has_edge_attribute( @$edge, 'bond' ) ) { $order = $bond_symbol_to_order{$core->get_edge_attribute( @$edge, 'bond' )}; } $valences{$edge->[0]} += $order; $valences{$edge->[1]} += $order; } # Check if an elementary cycle could be regarded as aromatic my %uniq = map { join( '', sort @$_ ) => $_ } SSSR( $core, 8 ); my @aromatic; for my $cycle (values %uniq) { # All atoms in a cyclic compound must not have more than 3 neighbours next if any { $core->degree( $_ ) > 3 } @$cycle; my @v2 = grep { $core->degree( $_ ) == 2 } @$cycle; my @v3 = grep { $core->degree( $_ ) == 3 } @$cycle; # There can be only one nonaromatic atom in a cycle my @nonaromatic_atoms = grep { $valences{$_} < 3 } @v2; if( @nonaromatic_atoms ) { next if @nonaromatic_atoms > 1; next unless element( $nonaromatic_atoms[0] ) =~ /^[NOPS]$/; } push @aromatic, [ Graph::Traversal::DFS->new( subgraph( $core, @$cycle ) )->dfs ]; } # Mark selected cycles as aromatic for my $cycle (@aromatic) { my @vertices = @$cycle; for (0..$#vertices) { $graph->set_edge_attribute( $vertices[$_], $vertices[($_ + 1) % @vertices], 'bond', ':' ); if( $vertices[$_]->{symbol} =~ /^(Se|As|[BCNOPS])$/ ) { $vertices[$_]->{symbol} = lcfirst $vertices[$_]->{symbol}; } } } } # Identifying ring systems, any unknown ring system terminates the naming for my $core (@ring_systems) { my %vertices_by_degree; for my $vertex ($core->vertices) { my $degree = $core->degree( $vertex ); $vertices_by_degree{$degree} = [] unless $vertices_by_degree{$degree}; push @{$vertices_by_degree{$degree}}, $vertex; } my $compound; if( join( ',', sort keys %vertices_by_degree ) eq '2' ) { # Monocycles $compound = ChemOnomatopist::Chain::Monocycle->new( $graph, Graph::Traversal::DFS->new( $core )->dfs ); } elsif( ChemOnomatopist::Chain::Monospiro->has_form( $core ) ) { # BBv2 P-24.2.1 Monospiro alicyclic ring systems $compound = ChemOnomatopist::Chain::Monospiro->new( $graph, $core->vertices ); } elsif( ChemOnomatopist::Chain::Bicycle->has_form( $core ) ) { # Ortho-fused as defined in BBv2 P-25.3.1.1.1 $compound = ChemOnomatopist::Chain::Bicycle->new( $graph, $core->vertices ); } elsif( ChemOnomatopist::Chain::Porphyrin->has_form( $core ) ) { # Porphyrin $compound = ChemOnomatopist::Chain::Porphyrin->new( $graph, $core->vertices ); } elsif( join( ',', sort keys %vertices_by_degree ) eq '2,3' ) { # Fused ring systems of three or more rings # Graph::MoreUtils::SSSR v0.1.0 does not know how to return unique rings my %uniq = map { join( '', sort @$_ ) => $_ } SSSR( $core, 8 ); my @cycles = map { ChemOnomatopist::Chain::Monocycle->new( copy $graph, Graph::Traversal::DFS->new( $_ )->dfs ) } map { subgraph( $core, @$_ ) } values %uniq; if( (grep { $_->is_benzene } @cycles) == 2 && (grep { !$_->is_benzene } @cycles) == 1 && (all { $_->length == 6 } @cycles) && (any { !$_->is_hydrocarbon } @cycles) && are_isomorphic( graph_without_edge_attributes( $core ), ChemOnomatopist::Chain::Xanthene->ideal_graph, sub { return 'C' } ) ) { $compound = ChemOnomatopist::Chain::Xanthene->new( $graph, @cycles ); } elsif( ChemOnomatopist::Chain::Aceylene->has_form( $core ) ) { $compound = ChemOnomatopist::Chain::Aceylene->new( $graph, @cycles ); } elsif( @cycles == 3 && (grep { $_->is_benzene } @cycles) == 2 && (grep { $_->is_hydrocarbon && $_->length == 5 } @cycles) && ChemOnomatopist::Chain::Fluorene->has_form( $core ) ) { $compound = ChemOnomatopist::Chain::Fluorene->new( $graph, @cycles ); } elsif( @cycles >= 3 && (all { $_->length == 6 && $_->is_hydrocarbon } @cycles) && ChemOnomatopist::Chain::Polyacene->has_form( $core ) ) { $compound = ChemOnomatopist::Chain::Polyacene->new( $graph, @cycles ); } elsif( @cycles == 3 && (all { $_->length == 6 } @cycles) && are_isomorphic( graph_without_edge_attributes( $core ), ChemOnomatopist::Chain::Phenanthrene->ideal_graph, sub { return 'C' } ) ) { $compound = ChemOnomatopist::Chain::Phenanthrene->new( $graph, @cycles ); } elsif( @cycles >= 4 && (all { $_->length == 6 && $_->is_hydrocarbon } @cycles) && ChemOnomatopist::Chain::Polyaphene->has_form( $core ) ) { $compound = ChemOnomatopist::Chain::Polyaphene->new( $graph, @cycles ); } elsif( ChemOnomatopist::Chain::Picene->has_form( $core ) ) { $compound = ChemOnomatopist::Chain::Picene->new( $graph, @cycles ); } elsif( ChemOnomatopist::Chain::Polyhelicene->has_form( $core ) ) { $compound = ChemOnomatopist::Chain::Polyhelicene->new( $graph, @cycles ); } elsif( ChemOnomatopist::Chain::VonBaeyer->has_form( $core ) ) { $compound = ChemOnomatopist::Chain::VonBaeyer->new( $graph, $core->vertices ); } else { die "cannot handle complicated cyclic compounds\n"; } } else { die "cannot handle complicated cyclic compounds\n"; } $graph->add_group( $compound ); } parse_molecular_graph( $graph ); if( $DEBUG ) { for (sort map { ref $_ } grep { blessed $_ } $graph->groups) { print $_; } for (sort map { ref $_ } grep { blessed $_ } $graph->vertices) { print $_; } } # Charges are not handled yet if( $CAUTIOUS && any { !blessed $_ && exists $_->{charge} } $graph->vertices ) { die "cannot handle charges for now\n"; } # Safeguarding against multiple cycle amides sharing the same amido group. # Otherwise this may lead to endless loops. my @amides_in_carboxamides = map { $_->{amide} } grep { $_->isa( ChemOnomatopist::Chain::Carboxamide:: ) } $graph->groups; if( @amides_in_carboxamides > uniq @amides_in_carboxamides ) { die "cannot process multiple cycle amides sharing the same amide group\n"; } # Safeguarding against multiple urea groups which as well leads to endless loops. # Failcase: 2,4-diimidotricarbonic diamide (from BBv2) if( (grep { $_->isa( ChemOnomatopist::Group::Urea:: ) } $graph->groups) > 1 ) { die "cannot process multiple urea groups\n"; } return; } # Derive the chemical element of atom or group representation sub element { my( $atom_or_group ) = @_; return undef unless ref $atom_or_group; if( !blessed $atom_or_group ) { die "unknown value '$atom_or_group' given for element()\n" unless ref $atom_or_group eq 'HASH'; return ucfirst $atom_or_group->{symbol}; } if( $atom_or_group->isa( 'Chemistry::Atom' ) ) { # PerlMol Atom return $atom_or_group->symbol; } return $atom_or_group->element; } sub charge { my( $atom_or_group ) = @_; return undef unless ref $atom_or_group; if( !blessed $atom_or_group ) { die "unknown value '$atom_or_group' given for charge()\n" unless ref $atom_or_group eq 'HASH'; return exists $atom_or_group->{charge} ? $atom_or_group->{charge} : 0; } return $atom_or_group->charge; } # Check if an object or Perl hash is of certain chemical element sub is_element { my( $atom, $element ) = @_; return unless ref $atom; $element = ucfirst $element; if( blessed $atom ) { return element( $atom ) && element( $atom ) eq $element; } return ref $atom eq 'HASH' && exists $atom->{symbol} && ucfirst $atom->{symbol} eq $element; } # Given a graph, selects the main chain. # The returned chain is an object of ChemOnomatopist::Chain or its subclasses. # The selection is implemented according to P-41 sub select_mainchain { my( $graph ) = @_; my @POI; # Points of interest # "4. Anions" @POI = grep { $_->{charge} && $_->{charge} < 0 } $graph->vertices unless @POI; # "6. Cations" @POI = grep { $_->{charge} && $_->{charge} > 0 } $graph->vertices unless @POI; # POIs are atoms connected to the most senior groups, if any my @groups = most_senior_groups( $graph ); my $most_senior_group = blessed $groups[0] if @groups; if( $most_senior_group && $most_senior_group eq ChemOnomatopist::Group::Ether:: ) { $most_senior_group = undef; @groups = (); } if( !@POI ) { for my $group (@groups) { if( $group->is_part_of_chain ) { push @POI, $group; } elsif( $group->isa( ChemOnomatopist::Group::Amide:: ) ) { push @POI, $group->{parent}; } elsif( $group->isa( ChemOnomatopist::Group::Imine:: ) ) { push @POI, grep { is_double_bond( $graph, $group, $_ ) } $graph->neighbours( $group ); } else { push @POI, $graph->neighbours( $group ); } } @POI = uniq @POI; # FIXME: Actually, more than one group can be attached to the same vertex } # "Classes denoted by the senior atom in heterane nomenclature" if( !@POI ) { my $elements = set( map { element( $_ ) } grep { !blessed $_ || !$_->isa( ChemOnomatopist::Group::Ether:: ) } # Ethers are less senior grep { !blessed $_ || !$_->is_prefix_only } # Prefix-only groups cannot act as main chains $graph->vertices ); my( $most_senior_element ) = grep { $elements->has( $_ ) } qw( N P As Sb Bi Si Ge Sn Pb B Al Ga In Tl O S Se Te ); # C removed intentionally if( $most_senior_element ) { @POI = grep { defined element( $_ ) && element( $_ ) eq $most_senior_element } $graph->vertices; # BBv3 P-44.1.2: it seems that if there are multiple (hetero?)cycles they have to compete according to P-44.2 my @POI_cycles = grep { $_->isa( ChemOnomatopist::Chain::Circular:: ) } uniq map { $graph->groups( $_ ) } @POI; my $ncycles = grep { $_->isa( ChemOnomatopist::Chain::Circular:: ) } $graph->groups; @POI = () if @POI_cycles == 1 && $ncycles > 1; } } # "40. Carbon compounds: rings, chains" if( !@POI ) { @POI = grep { $_->isa( ChemOnomatopist::Chain::Circular:: ) } $graph->groups; } # "41. Ethers, then sulfides, sulfoxides, sulfones; then selenides, selenoxides, etc." if( !@POI ) { @groups = most_senior_groups( grep { $_->isa( ChemOnomatopist::Group::Ether:: ) } grep { blessed $_ } $graph->vertices ); @POI = @groups; $most_senior_group = blessed $groups[0] if @groups; } print STDERR '>>> most senior functional group: ' . ($most_senior_group ? $most_senior_group : '(none)') . "\n" if $DEBUG; my @parents = @POI; my @chains; if( @parents ) { # Select a chain containing most POIs # Prefer circular structures # FIXME: As per example in BBv3 P-63.7, chain with more principal groups than a ring is prefered. if( map { $graph->groups( $_ ) } @parents ) { @parents = uniq map { $graph->groups( $_ ) } @parents; } if( all { blessed $_ && $_->isa( ChemOnomatopist::Chain:: ) } @parents ) { @chains = map { $_->can( 'candidates' ) ? $_->candidates : $_ } @parents; } elsif( $most_senior_group && $most_senior_group->isa( ChemOnomatopist::Chain:: ) ) { @chains = map { $_->can( 'candidates' ) ? $_->candidates : $_ } @groups; } elsif( @parents == 1 ) { if( blessed $parents[0] && $parents[0]->can( 'candidates' ) ) { @chains = $parents[0]->candidates; } else { # As the starting position is known, it is enough to take the "sidechain" # containing this particular parent: my $copy = $graph->copy; if( @groups ) { # In order not to overrun groups in chain, all non-N groups are removed $copy->delete_vertices( grep { blessed $_ && (!element( $_ ) || element( $_ ) ne 'N') && $_ != $groups[0] && $_ != $parents[0] } $copy->vertices ); } my $chain = select_sidechain( $copy, (blessed $groups[0] && $groups[0]->is_terminal ? @groups : undef), @parents ); my @vertices = $chain->vertices; push @chains, ChemOnomatopist::Chain->new( $graph, undef, @vertices ); if( @vertices > 1 && !$chains[-1]->isa( ChemOnomatopist::Chain::Amine:: ) ) { # There is no use in reversing chains of single vertices. # ChemOnomatopist::Chain::Amine chains start with amine group, cannot be reversed. push @chains, ChemOnomatopist::Chain->new( $graph, undef, reverse @vertices ); } } } elsif( @parents ) { my $copy = $graph->copy; $copy->delete_vertices( grep { !blessed $_ && $copy->degree( $_ ) == 1 && element( $_ ) =~ /^(F|Cl|Br|I)$/ } $copy->vertices ); $copy->delete_vertices( map { $_->vertices } $copy->groups ); $copy->delete_vertices( grep { blessed $_ && !$_->is_part_of_chain } $copy->vertices ); my @paths; my $max_value; for my $i (0..$#parents) { for my $j (($i+1)..$#parents) { my @path = graph_path_between_vertices( $copy, $parents[$i], $parents[$j] ); next unless @path; my $value = (set( @parents ) * set( @path ))->size; if( !defined $max_value || $max_value < $value ) { @paths = ( \@path ); $max_value = $value; } elsif( $max_value == $value ) { push @paths, \@path; } } } # Maybe there is no path between any given pair of POIs. # If so, single-POI paths can be made. @paths = map { [ $_, $_ ] } @parents unless @paths; # Construct all chains having all possible extensions to both sides of the selected path my %longest_paths; for my $path (@paths) { my $copy = copy $graph; $copy->delete_path( @$path ); $copy->delete_vertices( grep { !blessed $_ && $copy->degree( $_ ) == 1 && element( $_ ) =~ /^(F|Cl|Br|I)$/ } $copy->vertices ); $copy->delete_vertices( map { $_->vertices } $copy->groups ); $copy->delete_vertices( grep { !is_element( $_, 'C' ) && !$_->is_part_of_chain } grep { blessed $_ } $copy->vertices ); my $A = shift @$path; my $B = pop @$path; if( !exists $longest_paths{$A} ) { $longest_paths{$A} = [ graph_longest_paths_from_vertex( $copy, $A ) ]; } if( !exists $longest_paths{$B} ) { $longest_paths{$B} = [ graph_longest_paths_from_vertex( $copy, $B ) ]; } for my $i (0..$#{$longest_paths{$A}}) { for my $j (0..$#{$longest_paths{$B}}) { if( $A == $B ) { if( $i == $j ) { push @chains, ChemOnomatopist::Chain->new( $graph, undef, @{$longest_paths{$A}->[$i]} ); next; } elsif( $longest_paths{$A}->[$i][1] == $longest_paths{$A}->[$j][1] ) { next; } # CHECKME: Maybe we are getting too many combinations? } push @chains, ChemOnomatopist::Chain->new( $graph, undef, reverse( @{$longest_paths{$A}->[$i]} ), @$path, @{$longest_paths{$B}->[$j]} ), ChemOnomatopist::Chain->new( $graph, undef, reverse( @{$longest_paths{$B}->[$j]} ), reverse( @$path ), @{$longest_paths{$A}->[$j]} ); } } } die "cannot determine the parent structure\n" unless @chains; # FIXME: This should probably be replaced by "most POIs" @chains = rule_most_groups( $most_senior_group, @chains ) if $most_senior_group; } elsif( @groups ) { # Attempt to build chains from functional groups @chains = map { ChemOnomatopist::Chain->new( $graph, undef, $_ ) } @groups; } else { die "cannot determine the parent structure\n"; } } elsif( $graph->groups ) { @chains = map { $_->can( 'candidates' ) ? $_->candidates : $_ } $graph->groups; # FIXME: This is a hack } else { # Here the candidate halves for the longest (and "best") path are placed in @path_parts. # Each of candidate halves start with center atom. my $subgraph = copy $graph; $subgraph->delete_vertices( grep { !blessed $_ && $subgraph->degree( $_ ) == 1 && element( $_ ) =~ /^(F|Cl|Br|I)$/ } $subgraph->vertices ); $subgraph->delete_vertices( grep { blessed $_ && $_->isa( ChemOnomatopist::Group:: ) && !$_->is_part_of_chain } $subgraph->vertices ); die "cannot determine the parent structure\n" unless $subgraph->is_connected; my @center = graph_center( $subgraph ); my @path_parts; if( @center == 1 ) { # Longest path has odd length for my $path ( graph_longest_paths_from_vertex( $subgraph, $center[0] ) ) { push @path_parts, ChemOnomatopist::ChainHalf->new( $graph, undef, @$path ); } } else { # Longest path has even length # Graph copy without center edge is required by graph_longest_paths_from_vertex() my $copy = copy $subgraph; $copy->delete_edge( @center ); for my $vertex ( @center ) { push @path_parts, map { ChemOnomatopist::ChainHalf->new( $graph, (grep { $_ ne $vertex } @center), @$_ ) } graph_longest_paths_from_vertex( $copy, $vertex ); } } if( @path_parts > 1 ) { # Generate all possible chains. # FIXME: This needs optimisation. for my $part1 (@path_parts) { for my $part2 (@path_parts) { next if $part1->group eq $part2->group; push @chains, ChemOnomatopist::Chain::FromHalves->new( $part1, $part2 ); } } } else { # methane @chains = @path_parts; } } die "cannot determine the parent structure\n" unless @chains; my $chain = filter_chains( @chains ); $chain->{is_main} = 1; my @vertices = $chain->vertices; # Replace the outdated chain with the selected one for my $group ($graph->groups) { next unless set( $group->vertices ) == set( $chain->vertices ); $graph->delete_group( $group ); } $graph->add_group( $chain ); ChemOnomatopist::Grammar::parse_graph( $graph, @ChemOnomatopist::Grammar::mainchain_rules ); $chain = first { $_->is_main } $graph->groups; # If there is at least one of carbon-based senior group attachment, # it means both ends are already senior, prompting to follow the # exception of three or more carbon-based groups. if( $most_senior_group && element( $groups[0] ) && element( $groups[0] ) eq 'C' && !$chain->isa( ChemOnomatopist::Chain::Circular:: ) && $chain->number_of_groups( $most_senior_group ) ) { shift @vertices; pop @vertices; $chain = ChemOnomatopist::Chain->new( $graph, undef, @vertices ); } print STDERR ">>> mainchain: $chain (length = " . $chain->length . ")\n" if $DEBUG; return $chain; } # Selects the best side chain sub select_sidechain { my( $graph, $parent, $start ) = @_; # Do this for non-carbons for now in order to represent attachments if( !is_element( $start, 'C' ) && $graph->degree( $start ) == 0 + defined $parent ) { return ChemOnomatopist::Chain->new( $graph, $parent, $start ); } my $C_graph = copy $graph; $C_graph->delete_edge( $start, $parent ) if $parent; if( $graph->degree( $start ) == 1 + defined $parent && grep { element( $start ) && element( $start ) eq $_ } qw( S Se Te ) ) { # Chalcogen analogues of ethers $C_graph->delete_vertices( grep { !is_element( $_, element( $start ) ) } $C_graph->vertices ); } elsif( $parent && grep { element( $start ) && element( $start ) eq $_ } qw( Si ) ) { # Elements which can start sidechains $C_graph->delete_vertices( grep { !is_element( $_, element( $start ) ) } $C_graph->vertices ); } else { # Delete non-carbon leaves $C_graph->delete_vertices( grep { $_ != $start && !is_element( $_, 'C' ) && $C_graph->degree( $_ ) == 1 } $C_graph->vertices ); } # Delete formed chains $C_graph->delete_vertices( grep { $_ != $start } map { $_->vertices } $C_graph->groups ); # FIXME: Ad-hoc, but works... $C_graph->delete_vertices( grep { $_ != $start && blessed $_ && $_->isa( ChemOnomatopist::Group::Amine:: ) } $C_graph->vertices ); return ChemOnomatopist::Chain->new( $graph, $parent, $start ) unless $C_graph->degree( $start ); my @path_parts; for my $neighbour ($C_graph->neighbours( $start )) { my $graph_copy = copy $C_graph; $graph_copy->delete_edge( $start, $neighbour ); for my $path ( graph_longest_paths_from_vertex( $graph_copy, $neighbour ) ) { push @path_parts, ChemOnomatopist::ChainHalf->new( $graph, undef, $start, @$path ); } } my @chains; if( $C_graph->degree( $start ) > 1 && (!blessed $start || !$start->is_terminal) ) { # FIXME: Deduplicate: copied from select_mainchain() # Generate all possible chains. # FIXME: This needs optimisation. for my $part1 (@path_parts) { for my $part2 (@path_parts) { next if $part1->group eq $part2->group; push @chains, ChemOnomatopist::Chain::FromHalves->new( $part1, $part2 ); } } } else { @chains = map { ChemOnomatopist::Chain->new( $graph, $parent, $_->vertices ) } @path_parts; } die "cannot select a sidechain\n" unless @chains; # From BBv2 P-29.2 my $rule_lowest_free_valence = sub { my( @chains ) = @_; my @chains_now; my $lowest_locant; for my $chain (@chains) { my @vertices = $chain->vertices; my( $locant ) = grep { $vertices[$_] == $start } 0..$#vertices; if( @chains_now ) { if( $lowest_locant > $locant ) { @chains_now = ( $chain ); $lowest_locant = $locant; } elsif( $lowest_locant == $locant ) { push @chains_now, $chain; } } else { @chains_now = ( $chain ); $lowest_locant = $locant; } } return @chains_now; }; for my $rule ( sub { return @_ }, \&rule_longest_chains, \&rule_greatest_number_of_side_chains, # After this rule we are left with a set of longest chains all having the same number of side chains $rule_lowest_free_valence, \&rule_most_multiple_bonds, \&rule_most_double_bonds, \&rule_lowest_numbered_multiple_bonds, \&rule_lowest_numbered_locants, \&rule_most_carbon_in_side_chains, \&rule_least_branched_side_chains, \&pick_chain_with_lowest_attachments_alphabetically, \&pick_alphabetically_earliest ) { my @chains_now = $rule->( @chains ); # CHECK: Can a rule cause disappearance of all chains? next unless @chains_now; @chains = @chains_now; # Narrow down the selection # If a single chain cannot be chosen now, pass on to the next rule next unless @chains == 1; return shift @chains; } die "cannot select a sidechain\n"; } # TODO: Should reflect the order described in BBv2 P-44 sub filter_chains { my( @chains ) = @_; for my $rule ( sub { @_ }, # P-44.1.1: Maximum number of substituents of principal characteristic group. # P-44.1.2: Senior atom. # This is not needed as select_mainchain() returns such chains. \&rule_lowest_numbered_charges, # There is no such rule, but this is required as charges are not treated as suffix groups \&rule_lowest_numbered_anions, # TODO: P-44.1.2: Concerns rings # P-44.2.1 (a): Ring system is a heterosystem \&rule_circular_is_heterocycle, # P-44.2.1 (b): Ring system has at least one nitrogen atom # CHECKME: Is not this the same as implemented just below? \&rule_circular_has_nitrogen, # P-44.2.1 (c): Ring system has the most senior heteroatom \&rule_circular_most_senior_heteroatom, # P-44.2.1 (d): Ring system has most rings \&rule_circular_most_rings, # P-44.3.1: Maximum number of heteroatoms of any kind \&rule_most_heteroatoms, # P-44.3.2: Maximum number of skeletal atoms \&rule_longest_chains, # P-44.3.3: Maximum number of the most senior skeletal heteroatom \&rule_greatest_number_of_most_senior_heteroatoms, # P-44.4.1.1: Greater number of multiple bonds \&rule_most_multiple_bonds, # P-44.4.1.2: Greater number of double bonds \&rule_most_double_bonds, # TODO: P-44.4.1.3: Nonstandard bonding numbers # P-44.4.1.3.1: Greater number of atoms with nonstandard bonding numbers \&rule_most_nonstandard_valence_positions, # P-44.4.1.3.2: Lowest locants for atoms with nonstandard bonding numbers ### \&rule_lowest_numbered_nonstandard_valence_positions, # This fails 2,5,8-trioxa-11λ4-thiadodecane # TODO: P-44.4.1.4: Concerns rings with indicated hydrogen \&rule_most_indicated_hydrogens, # There is no such rule, but before comparing lists they have to be of the same size? \&rule_lowest_numbered_indicated_hydrogens, # P-44.4.1.5: Lowest locants for heteroatoms in skeletal chain \&rule_lowest_numbered_heteroatoms, # P-44.4.1.6: Lowest locants for heteroatoms in skeletal chain according to heteroatom seniority \&rule_lowest_numbered_most_senior_heteroatoms, # TODO: P-44.4.1.7: Concerns fused rings # P-44.4.1.8: Lowest locants for suffix groups \&rule_lowest_numbered_senior_groups, # TODO: P-44.4.1.9: Concerns rings # TODO: P-44.4.1.10: Lowest locants for prefixes/suffixes expressing degrees of hydrogenation # This is not fully implemented now # P-44.4.1.10.1: Lowest locants for multiple bonds as a set, then to double bonds \&rule_lowest_numbered_multiple_bonds, \&rule_lowest_numbered_double_bonds, # TODO: P-44.4.1.10.2: Lower locants for hydro/dehydro prefixes # P-44.4.1.11: Concerns isotopes # P-44.4.1.11.1: Greater number of isotopically modified atoms or groups \&rule_greater_number_of_isotopically_modified_atoms_or_groups, # P-44.4.1.11.2: Greater number of nuclides of higher atomic number \&rule_greater_number_of_nuclides_of_higher_atomic_number, # P-44.4.1.11.3: Greater number of nuclides of higher mass number \&rule_greater_number_of_nuclides_of_higher_mass_number, # P-44.4.1.11.4: Lowest locants for isotopically modified atoms or groups \&rule_lowest_locants_for_isotopically_modified_atoms_or_groups, # P-44.4.1.11.5: Lowest locants for nuclides of higher atomic number \&rule_lowest_locants_for_nuclides_of_higher_atomic_number, # P-44.4.1.11.6: Lowest locants for nuclides of higher mass number \&rule_lowest_locants_for_nuclides_of_higher_mass_number, # TODO: P-44.4.1.12: Concerns stereogenic centers # TODO: P-45.1: Multiplication of identical senior parent structures # P-45.2.1: Maximum number of prefix substituents # FIXME: This includes suffix substituents now \&rule_greatest_number_of_side_chains, # P-45.2.2: Lowest locants for prefix substituents # FIXME: This includes suffix substituents now \&rule_lowest_numbered_locants, # TODO: Put these in correct order: \&rule_most_carbon_in_side_chains, \&rule_least_branched_side_chains, # P-45.2.3: Lowest locants for prefix substituents in their order of citation in the name \&pick_chain_with_lowest_attachments_alphabetically, # TODO: P-45.3: Nonstandard bond numbers # TODO: P-45.4: Concerns isotopes # P-45.4.2: Lowest locants for nuclides of higher atomic number \&rule_lowest_locants_for_nuclides_of_higher_atomic_number, # P-45.4.3: Lowest locants for nuclides of higher mass number \&rule_lowest_locants_for_nuclides_of_higher_mass_number, # P-45.5: Alphanumerical order of names # TODO: This is not implemented fully \&pick_alphabetically_earliest, # TODO: P-45.6: Concerns stereochemistry ) { my @chains_now = $rule->( @chains ); if( $DEBUG ) { require Sub::Identify; print STDERR '>>> ', Sub::Identify::sub_name( $rule ), "\n"; } # If a rule causes disappearance of all chains, it is a bad discriminator next unless @chains_now; @chains = @chains_now; # Narrow down the selection # If a single chain cannot be chosen now, pass on to the next rule return shift @chains if @chains == 1; } # TODO: Handle the case when none of the rules select proper chains return (); } sub rule_most_groups { my $class = shift; return all_max { $_->number_of_groups( $class ) } @_; } sub rule_circular_is_heterocycle { my @chains = @_; return @chains unless all { $_->isa( ChemOnomatopist::Chain::Circular:: ) } @chains; return grep { $_->is_heterocycle } @chains; } sub rule_circular_has_nitrogen { my @chains = @_; return @chains unless all { $_->isa( ChemOnomatopist::Chain::Circular:: ) } @chains; return grep { any { $_ eq 'N' } $_->heteroatoms } @chains; } sub rule_circular_most_senior_heteroatom { my @chains = @_; return @chains unless all { $_->isa( ChemOnomatopist::Chain::Circular:: ) } @chains; return ChemOnomatopist::Chain::Bicycle::rule_most_senior_heteroatom( @chains ); } sub rule_circular_most_rings { return @_ unless all { $_->isa( ChemOnomatopist::Chain::Circular:: ) } @_; return all_max { $_->number_of_rings } @_; } sub rule_lowest_numbered_senior_groups { all_min { ChemOnomatopist::Comparable::Array::Numeric->new( $_->most_senior_group_positions ) } @_ } sub rule_lowest_numbered_multiple_bonds { all_min { ChemOnomatopist::Comparable::Array::Numeric->new( $_->multiple_bond_positions ) } @_ } sub rule_lowest_numbered_double_bonds { all_min { ChemOnomatopist::Comparable::Array::Numeric->new( $_->double_bond_positions ) } @_ } # This rule is employed only if longest chains are not already preselected sub rule_longest_chains { all_max { $_->length } @_ } sub rule_greatest_number_of_side_chains { all_max { $_->number_of_branches } @_ } sub rule_lowest_numbered_locants { all_min { ChemOnomatopist::Comparable::Array::Numeric->new( $_->branch_positions ) } @_ } sub rule_most_carbon_in_side_chains { all_max { $_->number_of_carbons } @_ } sub rule_least_branched_side_chains { all_min { $_->number_of_branches_in_sidechains } @_ } sub rule_most_heteroatoms { all_max { $_->number_of_heteroatoms } @_ } sub rule_greatest_number_of_most_senior_heteroatoms { my( @chains ) = @_; my( $max_value ) = sort { cmp_heteroatom_counts( $a, $b ) } map { [ $_->heteroatoms ] } @chains; return grep { !cmp_heteroatom_counts( [ $_->heteroatoms ], $max_value ) } @chains; } sub rule_most_multiple_bonds { all_max { $_->number_of_multiple_bonds } @_ } sub rule_lowest_numbered_charges { all_min { ChemOnomatopist::Comparable::Array::Numeric->new( map { $_->index } $_->charges ) } @_ } sub rule_lowest_numbered_anions { all_min { ChemOnomatopist::Comparable::Array::Numeric->new( map { $_->index } grep { $_->charge < 0 } $_->charges ) } @_ } sub rule_most_double_bonds { all_max { $_->number_of_double_bonds } @_ } sub rule_greater_number_of_isotopically_modified_atoms_or_groups { all_max { $_->number_of_isotopes } @_ } sub rule_greater_number_of_nuclides_of_higher_atomic_number { all_max { ChemOnomatopist::Comparable::Array::Isotope::By::AtomicNumber->new( $_->isotopes ) } @_ } sub rule_greater_number_of_nuclides_of_higher_mass_number { all_max { ChemOnomatopist::Comparable::Array::Isotope::By::MassNumber->new( $_->isotopes ) } @_ } sub rule_lowest_locants_for_isotopically_modified_atoms_or_groups { all_min { ChemOnomatopist::Comparable::Array::Numeric->new( sort map { $_->locant } $_->isotopes ) } @_ } sub rule_lowest_locants_for_nuclides_of_higher_atomic_number { my( @chains ) = @_; my( $max_value ) = sort { cmp_arrays( $a, $b ) } map { [ map { $_->locant } sort { $b->atomic_number <=> $a->atomic_number } $_->isotopes ] } @chains; return grep { !cmp_arrays( [ map { $_->locant } sort { $b->atomic_number <=> $a->atomic_number } $_->isotopes ], $max_value ) } @chains; } sub rule_lowest_locants_for_nuclides_of_higher_mass_number { my( @chains ) = @_; my( $max_value ) = sort { cmp_arrays( $a, $b ) } map { [ map { $_->locant } sort { $b->mass_number <=> $a->mass_number } $_->isotopes ] } @chains; return grep { !cmp_arrays( [ map { $_->locant } sort { $b->mass_number <=> $a->mass_number } $_->isotopes ], $max_value ) } @chains; } sub rule_most_nonstandard_valence_positions { all_max { $_->number_of_nonstandard_valence_positions } @_ } sub rule_lowest_numbered_nonstandard_valence_positions { all_min { ChemOnomatopist::Comparable::Array::Numeric->new( $_->nonstandard_valence_positions ) } @_ } sub rule_most_indicated_hydrogens { return all_max { $_->number_of_indicated_hydrogens } grep { $_->isa( ChemOnomatopist::Chain::Circular:: ) && $_->needs_indicated_hydrogens } @_; } sub rule_lowest_numbered_indicated_hydrogens { return all_min { ChemOnomatopist::Comparable::Array::Numeric->new( $_->indicated_hydrogens ) } grep { $_->isa( ChemOnomatopist::Chain::Circular:: ) && $_->needs_indicated_hydrogens } @_; } sub rule_lowest_numbered_heteroatoms { all_min { ChemOnomatopist::Comparable::Array::Numeric->new( $_->heteroatom_positions ) } @_ } sub rule_lowest_numbered_most_senior_heteroatoms { my( @chains ) = @_; my( $max_value ) = sort { cmp_heteroatom_seniority( [ $a->heteroatoms ], [ $b->heteroatoms ] ) } @chains; return grep { !cmp_heteroatom_seniority( [ $_->heteroatoms ], [ $max_value->heteroatoms ] ) } @chains; } sub pick_chain_with_lowest_attachments_alphabetically { my( @chains ) = @_; my @chain_locants; for my $chain (@chains) { my @locant_names = $chain->locant_names; my @names; my @positions; for (0..$#locant_names) { next unless @{$locant_names[$_]}; # Skip empty positions push @names, @{$locant_names[$_]}; push @positions, ( $_ ) x @{$locant_names[$_]}; } # At this point parallel arrays @names contains names, and @positions contains positions. my @order = sort { cmp_only_alphabetical( $names[$a], $names[$b] ) || $names[$a] cmp $names[$b] } 0..$#names; push @chain_locants, [ map { $positions[$_] } @order ]; } my @sorted = sort { cmp_arrays( $chain_locants[$a], $chain_locants[$b] ) } 0..$#chain_locants; return map { $chains[$_] } grep { !cmp_arrays( $chain_locants[$sorted[0]], $chain_locants[$_] ) } 0..$#chain_locants; } sub pick_alphabetically_earliest { my( @chains ) = @_; my( $max_value ) = sort { cmp_only_alphabetical( [ $a->locant_names ], [ $b->locant_names ] ) } @chains; return $max_value; } sub most_senior_groups { my( @vertices ) = @_; my $graph; if( @vertices == 1 && blessed $vertices[0] && $vertices[0]->isa( Graph::Undirected:: ) ) { # Graph given instead of an array of vertices $graph = shift @vertices; @vertices = $graph->vertices; } my @groups = grep { blessed $_ && $_->isa( ChemOnomatopist::Group:: ) && !$_->is_prefix_only } @vertices; # TODO: For now grep is used, in future only ChemOnomatopist::Group subclasses should remain if( $graph && $graph->isa( ChemOnomatopist::MolecularGraph:: ) ) { push @groups, grep { $_->isa( ChemOnomatopist::Group:: ) } $graph->groups; } return unless @groups; my( $most_senior_group ) = sort { ChemOnomatopist::Group::cmp( $a, $b ) } @groups; return grep { !ChemOnomatopist::Group::cmp( $_, $most_senior_group ) } @groups; } # Given two lists of heteroatoms, return the one with the most senior ones sub cmp_heteroatom_counts { my( $A, $B ) = @_; my( %A, %B ); for (@$A) { $A{$_}++ } for (@$B) { $B{$_}++ } my @elements = sort { $elements{$a}->{seniority} <=> $elements{$b}->{seniority} } grep { $elements{$_}->{seniority} >= 5 } # O and after keys %elements; for (@elements) { $A{$_} = 0 unless $A{$_}; $B{$_} = 0 unless $B{$_}; return $B{$_} <=> $A{$_} if $B{$_} <=> $A{$_}; } return 0; } sub cmp_heteroatom_seniority { my( $A, $B ) = @_; for (0..$#$A) { next unless $elements{$A->[$_]}->{seniority} <=> $elements{$B->[$_]}->{seniority}; return $elements{$A->[$_]}->{seniority} <=> $elements{$B->[$_]}->{seniority}; } return 0; } sub cmp_locants($$) { my( $A, $B ) = @_; return $A <=> $B if $A =~ /^[0-9]+$/ && $B =~ /^[0-9]+$/; return $A cmp $B if $A !~ /^[0-9]+$/ && $B !~ /^[0-9]+$/; return ($A =~ /^[0-9]+$/) <=> ($B =~ /^[0-9]+$/); # Letters go first } # Sorts given names only based on alphabetical part of the name. # tert compounds are ordered according to BBv2 P-14.5 which says: # "[t]he preferred order for alphanumerical order is: nonitalic Roman letters > italic letters > Greek letters." sub cmp_only_alphabetical { my( $a, $b ) = @_; if( ref $a eq 'ARRAY' || ref $b eq 'ARRAY' ) { my @A = ref $a eq 'ARRAY' ? @$a : ( $a ); my @B = ref $b eq 'ARRAY' ? @$b : ( $b ); for (0..min( scalar( @A ), scalar( @B ) )-1) { my $cmp = cmp_only_alphabetical( $A[$_], $B[$_] ); return $cmp if $cmp; } return @A <=> @B; } # Letters in isotopes are not compared (see BBv3 P-45.5) $a = $a->remove_isotopes if blessed $a; $b = $b->remove_isotopes if blessed $b; # Dropping hydrogen indicators $a =~ s/^\d+H-//; $b =~ s/^\d+H-//; $a =~ s/[^a-zA-Z]+//g; $b =~ s/[^a-zA-Z]+//g; my $a_has_tert = $a =~ s/^tert(butyl)$/$1/; my $b_has_tert = $b =~ s/^tert(butyl)$/$1/; return $a_has_tert <=> $b_has_tert if $a_has_tert <=> $b_has_tert; return $a cmp $b; } # According to https://en.wikipedia.org/wiki/IUPAC_numerical_multiplier sub IUPAC_numerical_multiplier { my( $N, $is_middle ) = @_; my $ones = $N % 10; my $tens = int( $N / 10 ) % 10; my $hundreds = int( $N / 100 ) % 10; my $thousands = int( $N / 1000 ) % 10; my @prefix = ( '', 'hen', 'di', 'tri', 'tetra', 'penta', 'hexa', 'hepta', 'octa', 'nona' ); return 'heni' if $N == 1 && $is_middle; return 'mono' if $N == 1; return 'do' if $N == 2 && $is_middle; if( $N < 10 ) { my $value = $prefix[$ones]; $value =~ s/a$// unless $is_middle; return $value; } return 'dec' . ($is_middle ? 'a' : '') if $N == 10; return 'undec' . ($is_middle ? 'a' : '') if $N == 11; return IUPAC_numerical_multiplier( $ones, 1 ) . 'dec' . ($is_middle ? 'a' : '') if $N < 20; return 'icos' . ($is_middle ? 'a' : '') if $N == 20; return IUPAC_numerical_multiplier( $ones, 1 ) . 'cos' . ($is_middle ? 'a' : '') if $N < 30; if( $N < 100 ) { return ($ones == 1 ? $prefix[$ones] : IUPAC_numerical_multiplier( $ones, 1 )) . IUPAC_numerical_multiplier( $tens, 1 ) . ($tens == 3 ? 'a' : '') . 'cont' . ($is_middle ? 'a' : ''); } if( $N < 1000 ) { my $prefix = int( $tens . $ones ) == 1 ? $prefix[$ones] : IUPAC_numerical_multiplier( int( $tens . $ones ), 1 ); $prefix[1] = 'he'; return $prefix . $prefix[$hundreds] . 'ct' . ($is_middle ? 'a' : ''); } if( $N < 10000 ) { $prefix[0] = 'ki'; return IUPAC_numerical_multiplier( int( $hundreds . $tens . $ones ), 1 ) . $prefix[$thousands] . 'li'; } die "cannot generate IUPAC numerical multiplier for $N\n"; } sub IUPAC_complex_numerical_multiplier { my( $N ) = @_; my @multipliers = ( undef, '', 'bis', 'tris' ); return $multipliers[$N] if $N < @multipliers; return IUPAC_numerical_multiplier( $N, 1 ) . 'kis'; } sub alkane_chain_name($) { my( $N ) = @_; die "alkane chain of zero length detected\n" unless $N; my @names = qw( ? meth eth prop but ); return $names[$N] if $N < @names; return IUPAC_numerical_multiplier( $N ); } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/000077500000000000000000000000001463750375500205775ustar00rootroot00000000000000ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain.pm000066400000000000000000000536321463750375500221700ustar00rootroot00000000000000package ChemOnomatopist::Chain; # ABSTRACT: Chain of atoms # VERSION use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Chain::Amide; use ChemOnomatopist::Chain::Amine; use ChemOnomatopist::Chain::Ether; use ChemOnomatopist::Charge; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Group::Carboxyl; use ChemOnomatopist::Group::Ether; use ChemOnomatopist::Isotope; use ChemOnomatopist::Name::Part::AlkaneANSuffix; use ChemOnomatopist::Name::Part::Isotope; use ChemOnomatopist::Util::SMILES qw( path_SMILES ); use Graph::Traversal::DFS; use List::Util qw( all any first none sum0 uniq ); use Scalar::Util qw( blessed ); use Set::Object qw( set ); sub vertices(); sub new { my( $class, $graph, $parent, @vertices ) = @_; # TODO: For now only chains with a single oxygen atom are detected as ethers. # First of all, ChemOnomatopist::Chain::Ether is not very clever. # Second, BBv2 P-63.2.4.1 does not draw a clear line between ether naming and skeletal replacement nomenclature. my $self; if( (grep { blessed $_ && $_->isa( ChemOnomatopist::Group::Ether:: ) } @vertices) == 1 ) { $self = ChemOnomatopist::Chain::Ether->new( $graph, $parent, @vertices ); } elsif( blessed $vertices[0] && $vertices[0]->isa( ChemOnomatopist::Group::Amine:: ) && none { blessed $_ && $_->isa( ChemOnomatopist::Group::Amine:: ) } @vertices[1..$#vertices] ) { my $amine = shift @vertices; if( $graph->degree( $amine ) <= 2 ) { my $chain = bless { vertices => \@vertices, graph => $graph }, $class; $self = ChemOnomatopist::Chain::Amine->new( $graph, $chain, $amine ); } else { $self = bless { vertices => [ $amine ], graph => $graph }, $class; } } else { $self = { vertices => \@vertices, graph => $graph, cache => {} }; $self->{parent} = $parent if $parent; $self = bless $self, $class; } return $self; } # Accessors sub graph() { my( $self ) = @_; return $self->{graph}; } sub parent(;$) { my( $self, $parent ) = @_; my $old_parent = exists $self->{parent} ? $self->{parent} : undef; $self->{parent} = $parent if $parent; # TODO: Maybe invalidate the related cache return $old_parent; } sub parent_locant() { my( $self ) = @_; my $parent = $self->parent; return $parent unless $parent; my $graph = $self->graph; my @vertices = $self->vertices; return first { $graph->has_edge( $vertices[$_], $parent ) } 0..$#vertices; } sub substituents() { my( $self ) = @_; my $vertices = set( $self->vertices ); my @substituents; for my $vertex ($self->vertices) { for my $neighbour ($self->graph->neighbours( $vertex )) { next if $vertices->has( $neighbour ); push @substituents, $neighbour; } } return @substituents; } sub vertices() { my( $self ) = @_; return @{$self->{vertices}}; } # Properties sub backbone_SMILES() { my( $self ) = @_; return path_SMILES( $self->graph, $self->vertices ); } sub bonds() { my( $self ) = @_; return @{$self->{bonds}} if $self->{bonds}; my $graph = $self->graph; my @vertices = $self->vertices; my @bonds; for (0..$self->length-2) { if( $graph->has_edge_attribute( @vertices[$_ .. $_+1], 'bond' ) ) { push @bonds, $graph->get_edge_attribute( @vertices[$_ .. $_+1], 'bond' ); } else { push @bonds, '-'; } } $self->{bonds} = \@bonds; return @bonds; } sub branch_positions() { my( $self ) = @_; return @{$self->{branch_positions}} if $self->{branch_positions}; my $graph = $self->_disconnected_chain_graph; my @vertices = $self->vertices; my @branch_positions = map { ( $_ ) x $graph->degree( $vertices[$_] ) } grep { $graph->degree( $vertices[$_] ) } 0..$#vertices; $self->{branch_positions} = \@branch_positions; return @branch_positions; } sub group_positions { my( $self, $class ) = @_; return @{$self->{group_positions}{$class}} if $self->{group_positions}{$class}; my $graph = $self->_disconnected_chain_graph; my @vertices = $self->vertices; my @group_positions; for (0..$#vertices) { my $groups = grep { blessed $_ && $_->isa( $class ) } $graph->neighbours( $vertices[$_] ); next unless $groups; push @group_positions, ( $_ ) x $groups; } $self->{group_positions}{$class} = \@group_positions; return @group_positions; } sub heteroatom_positions() { my( $self ) = @_; return @{$self->{heteroatom_positions}} if $self->{heteroatom_positions}; my @vertices = $self->vertices; my @heteroatom_positions; for (0..$#vertices) { next if blessed $vertices[$_] && !$vertices[$_]->isa( ChemOnomatopist::Group::Ether:: ); next if ChemOnomatopist::is_element( $vertices[$_], 'C' ); push @heteroatom_positions, $_; } $self->{heteroatom_positions} = \@heteroatom_positions; return @heteroatom_positions; } # CHECKME: Can chains have indicated hydrogens? sub indicated_hydrogens() { my @hydrogen_positions; return @hydrogen_positions } sub nonstandard_valence_positions() { my( $self ) = @_; return @{$self->{nonstandard_valence_positions}} if $self->{nonstandard_valence_positions}; my @vertices = $self->vertices; my @nonstandard_valence_positions; for (0..$#vertices) { next if blessed $vertices[$_]; next if ChemOnomatopist::element( $vertices[$_] ) eq 'C'; next if ChemOnomatopist::element( $vertices[$_] ) eq 'N'; next unless exists $vertices[$_]->{valence}; push @nonstandard_valence_positions, $_; } $self->{nonstandard_valence_positions} = \@nonstandard_valence_positions; return @nonstandard_valence_positions; } sub is_main() { $_[0]->{is_main} } sub is_hydrocarbon() { my( $self ) = @_; return $self->number_of_heteroatoms == 0; } sub is_saturated() { my( $self ) = @_; return all { $_ eq '-' } $self->bonds; } sub is_substituted() { my( $self ) = @_; return $self->number_of_branches > 0; } # Returns maximum number of substitutable locations sub max_valence() { my( $self ) = @_; my $max_valence = 0; for my $vertex ($self->vertices) { my $element = ChemOnomatopist::element( $vertex ); next unless $element; next if !exists $elements{$element}; next if !exists $elements{$element}->{standard_bonding_number}; $max_valence += $elements{$element}->{standard_bonding_number}; } for my $bond ($self->bonds) { $max_valence -= 2 if $bond eq '-'; $max_valence -= 3 if $bond eq ':'; # Aromatic bond is 1.5 $max_valence -= 4 if $bond eq '='; $max_valence -= 6 if $bond eq '#'; $max_valence -= 8 if $bond eq '$'; } return $max_valence; } sub most_senior_groups() { my( $self ) = @_; return ChemOnomatopist::most_senior_groups( $self->vertices, $self->substituents ); } sub most_senior_group_positions() { my( $self ) = @_; return @{$self->{most_senior_group_positions}} if $self->{most_senior_group_positions}; my @groups = ChemOnomatopist::most_senior_groups( $self->vertices, $self->substituents ); $self->{most_senior_group_positions} = []; return () unless @groups; my $groups = set( @groups ); my @vertices = $self->vertices; my $vertices = set( @vertices ); my @positions; for (0..$#vertices) { my $vertex = $vertices[$_]; push @positions, $_ if $groups->has( $vertex ); for my $neighbour ($self->{graph}->neighbours( $vertex )) { next if $vertices->has( $neighbour ); push @positions, $_ if $groups->has( $neighbour ); } } $self->{most_senior_group_positions} = \@positions; return @positions; } sub multiple_bond_positions() { my( $self ) = @_; my @bonds = $self->bonds; return grep { $bonds[$_] =~ /^[=#\$]$/ } 0..$#bonds; } sub double_bond_positions() { my( $self ) = @_; my @bonds = $self->bonds; return grep { $bonds[$_] eq '=' } 0..$#bonds; } # FIXME: Needs proper name sub needs_ane_suffix() { my( $self ) = @_; return $self->length == 1 && blessed $self->{vertices}[0] && ($self->{vertices}[0]->isa( ChemOnomatopist::Group::Sulfinyl:: ) || $self->{vertices}[0]->isa( ChemOnomatopist::Group::Sulfonyl:: )); } sub needs_multiple_bond_locants() { my( $self ) = @_; return $self->length > 2; } sub needs_multiple_bond_suffix() { my( $self ) = @_; my( $first ) = $self->vertices; return 1 unless blessed $first; return $first->needs_multiple_bond_suffix; } sub needs_heteroatom_locants() { my( $self ) = @_; return '' if $self->length == 1; my @vertices = $self->vertices; # Check if this is -oxy substituent if( $self->parent && !$self->number_of_branches && $self->number_of_heteroatoms == 1 && $vertices[0]->{symbol} eq 'O' ) { return ''; } if( scalar( uniq $self->heteroatoms ) == 1 ) { return $self->number_of_heteroatoms != $self->length; } elsif( scalar( uniq $self->heteroatoms ) > 1 ) { return 1; } } sub needs_heteroatom_names() { my( $self ) = @_; my @vertices = $self->vertices; # Check if this is -oxy substituent if( $self->parent && !$self->number_of_branches && $self->number_of_heteroatoms == 1 && $vertices[0]->{symbol} eq 'O' ) { return ''; } # Chalcogen analogues of ethers if( @vertices == 1 && $self->parent && grep { ChemOnomatopist::element( @vertices ) eq $_ } qw( S Se Te ) ) { return ''; } return 1; } sub needs_suffix_locant() { my( $self ) = @_; # BBv2 P-14.3.4.2 (a): mononuclear parent hydrides do not need locants return '' if $self->length == 1; return 1 if $self->isotopes; # BBv2 P-14.3.4.2 (b): monosubstituted homogeneous chains consisting of only two identical atoms do not need locants return '' if $self->length == 2 && $self->number_of_branches == 1; my @most_senior_groups = $self->most_senior_groups; return '' unless @most_senior_groups; return 1 if $self->number_of_heteroatoms; # P-15.4.3.2.3: Characteristic groups cited as suffixes are given locants return 1 if @most_senior_groups > 2; return 1 if !ChemOnomatopist::element( $most_senior_groups[0] ); return 1 if ChemOnomatopist::element( $most_senior_groups[0] ) ne 'C'; return ''; } sub needs_substituent_locants() { my( $self ) = @_; return '' if $self->length == 1; return 1 if $self->number_of_isotopes; # FIXME: Make sure the substituents are of the same kind return '' if scalar( $self->substituents ) == $self->max_valence; # Ad-hoc fix for acetic acid substituents if( $self->length == 2 && any { blessed $_ && $_->isa( ChemOnomatopist::Group::Carboxyl:: ) } $self->vertices ) { return ''; } return 1; } sub needs_charge_locants() { &needs_substituent_locants } sub needs_isotope_locants() { &needs_substituent_locants } sub charges() { my( $self ) = @_; return @{$self->{charges}} if $self->{charges}; my @vertices = $self->vertices; my @charges; for my $i (0..$#vertices) { next if blessed $vertices[$i]; next unless $vertices[$i]->{charge}; push @charges, ChemOnomatopist::Charge->new( $vertices[$i]->{charge}, $i, $self->locants( $i ) ); } $self->{charges} = \@charges; return @charges; } sub heteroatoms() { my( $self ) = @_; my @vertices = $self->vertices; return map { ChemOnomatopist::element( $vertices[$_] ) } $self->heteroatom_positions; } sub nonstandard_valences() { my( $self ) = @_; my @vertices = $self->vertices; return map { $vertices[$_]->{valence} } $self->nonstandard_valence_positions; } sub isotopes() { my( $self ) = @_; return @{$self->{isotopes}} if $self->{isotopes}; my @vertices = $self->vertices; my @isotopes; for my $i (0..$#vertices) { next if blessed $vertices[$i]; if( exists $vertices[$i]->{isotope} ) { push @isotopes, ChemOnomatopist::Isotope->new( ChemOnomatopist::element( $vertices[$i] ), $vertices[$i]->{isotope}, $i, $self->locants( $i ) ); } if( exists $vertices[$i]->{h_isotope} ) { for (@{$vertices[$i]->{h_isotope}}) { next unless defined $_; push @isotopes, ChemOnomatopist::Isotope->new( 'H', $_, $i, $self->locants( $i ) ); } } } $self->{isotopes} = \@isotopes; return @isotopes; } sub length() { my( $self ) = @_; return scalar $self->vertices; } sub locant_names() { my( $self ) = @_; return @{$self->{locant_names}} if $self->{locant_names}; my $graph = $self->_disconnected_chain_graph->copy; my @locants; for my $vertex ($self->vertices) { my @current_locants; for my $neighbour ($graph->neighbours( $vertex )) { $graph->delete_edge( $vertex, $neighbour ); next if $self->parent && $self->parent == $neighbour; if( blessed $neighbour ) { push @current_locants, $neighbour->prefix; } else { push @current_locants, ChemOnomatopist::get_sidechain_name( $graph, $vertex, $neighbour ); } } push @locants, \@current_locants; } $self->{locant_names} = \@locants; return @locants; } sub locants(@) { my $self = shift; return map { $_ + 1 } @_; } sub bond_locants(@) { my $self = shift; return map { $_ + 1 } @_; } sub number_of_branches_in_sidechains() { my( $self ) = @_; return $self->{number_of_branches_in_sidechains} if exists $self->{number_of_branches_in_sidechains}; my $graph = $self->_disconnected_chain_graph->copy; my @vertex_neighbours = map { $graph->neighbours( $_ ) } $self->vertices; $graph->delete_vertices( $self->vertices ); # has_vertex() is used to filter out neighbours within the chain my $number = sum0 map { $_ > 2 ? $_ - 2 : 0 } map { $graph->degree( $_ ) } map { Graph::Traversal::DFS->new( $graph, start => $_ )->dfs } grep { $graph->has_vertex( $_ ) } @vertex_neighbours; $self->{number_of_branches_in_sidechains} = $number; return $number; } sub number_of_carbons() { my( $self ) = @_; return $self->{number_of_carbons} if exists $self->{number_of_carbons}; my $graph = $self->_disconnected_chain_graph; my $C = grep { ChemOnomatopist::is_element( $_, 'C' ) } map { Graph::Traversal::DFS->new( $graph, start => $_ )->dfs } $self->vertices; # Since main chain carbons are included in the count, they have to be subtracted. $C -= $self->length; $self->{number_of_carbons} = $C; return $C; } sub number_of_charges() { my( $self ) = @_; return scalar $self->charges; } sub number_of_branches() { my( $self ) = @_; return scalar $self->branch_positions; } sub number_of_double_bonds() { my( $self ) = @_; return scalar grep { $_ eq '=' } $self->bonds; } sub number_of_groups { my( $self, $class ) = @_; return scalar $self->group_positions( $class ); } sub number_of_heteroatoms() { my( $self ) = @_; return scalar $self->heteroatom_positions; } sub number_of_indicated_hydrogens() { my( $self ) = @_; return scalar $self->indicated_hydrogens; } sub number_of_isotopes() { my( $self ) = @_; return scalar $self->isotopes; } sub number_of_multiple_bonds() { my( $self ) = @_; return scalar grep { $_ =~ /^[=#\$]$/ } $self->bonds; } sub number_of_nonstandard_valence_positions() { my( $self ) = @_; return scalar $self->nonstandard_valence_positions; } sub charge_part() { my( $self ) = @_; return ChemOnomatopist::Name->new unless $self->charges; my @negative = grep { $_->charge < 0 } $self->charges; my @positive = grep { $_->charge > 0 } $self->charges; my $name = ChemOnomatopist::Name->new; if( @positive ) { @positive = map { ( $_->locant ) x abs $_->charge } @positive; $name->append_locants( @positive ) if $self->needs_charge_locants; if( @positive > 1 ) { $name->append_multiplier( ChemOnomatopist::IUPAC_complex_numerical_multiplier( scalar @positive ) ); $name .= '('; } $name .= @negative ? 'ium' : 'ylium'; $name .= ')' if @positive > 1; } if( @negative ) { @negative = map { ( $_->locant ) x abs $_->charge } @negative; $name->append_locants( @negative ) if $self->needs_charge_locants; if( @negative > 1 ) { $name->append_multiplier( ChemOnomatopist::IUPAC_numerical_multiplier( scalar @negative ) ); } $name .= 'ide'; } return $name; } sub indicated_hydrogens_part() { ChemOnomatopist::Name->new } sub isotope_part() { my( $self ) = @_; my @isotopes = sort { $a->element cmp $b->element || $a->mass_number <=> $b->mass_number } $self->isotopes; return '' unless @isotopes; my @order; my %freq; for my $isotope (@isotopes) { my $key = $isotope->mass_number . $isotope->element; if( !$freq{$key} ) { $freq{$key} = []; push @order, $key; } push @{$freq{$key}}, $isotope; } my @vertices = $self->vertices; my $isotopes = ''; for my $key (@order) { $isotopes .= ',' if $isotopes; $isotopes .= join ',', map { $_->locant } @{$freq{$key}} if $self->needs_isotope_locants; $isotopes .= '-' if $self->needs_isotope_locants; $isotopes .= $key; if( @{$freq{$key}} > 1 || ( $key =~ /H$/ && $vertices[$freq{$key}->[0]{index}]->{hcount} > 1 ) ) { $isotopes .= scalar @{$freq{$key}}; } } return ChemOnomatopist::Name::Part::Isotope->new( "($isotopes)" ); } sub prefix() { my( $self ) = @_; if( $self->length == 1 ) { my $vertex = $self->{vertices}[0]; return $vertex->prefix if blessed $vertex; # Chalcogen analogues of ethers my $element = ChemOnomatopist::element( $vertex ); return ChemOnomatopist::Name->new( 'sulfan' ) if $element eq 'S'; return ChemOnomatopist::Name->new( 'selan' ) if $element eq 'Se'; return ChemOnomatopist::Name->new( 'tellan' ) if $element eq 'Te'; } my $name = $self->suffix; $name->pop_e; pop @$name if $name->ends_with_alkane_an_suffix; return $name . 'yl'; } sub suffix() { my( $self ) = @_; my @chain = $self->vertices; my $name = ChemOnomatopist::Name->new; if( $self->length == 1 && !blessed $chain[0] && ChemOnomatopist::element( @chain ) ne 'C' ) { return $name . 'ne'; # Leaving element prefix appending to the caller } # CHECKME: Not sure if calling prefix() is correct return $chain[0]->prefix if $self->length == 1 && blessed $chain[0]; my @bonds = $self->bonds; my @double = grep { $bonds[$_] eq '=' } 0..$#bonds; my @triple = grep { $bonds[$_] eq '#' } 0..$#bonds; # BBv2 P-63.2.2.2 if( $self->parent && @chain && (all { !blessed $_ } @chain) && ChemOnomatopist::element( @chain ) eq 'O' && !@double && !@triple && all { ChemOnomatopist::element( $_ ) eq 'C' } @chain[1..$#chain] ) { $name->append_stem( ChemOnomatopist::alkane_chain_name( $self->length - 1 ) ); $name .= 'oxy'; return $name; } if( $self->isa( ChemOnomatopist::Chain::Amide:: ) || $self->isa( ChemOnomatopist::Chain::Amine:: ) ) { $name->append_stem( ChemOnomatopist::alkane_chain_name( scalar grep { !blessed $_ } $self->vertices ) ); } elsif( (any { ChemOnomatopist::is_element( $_, 'C' ) } @chain) || scalar( uniq map { ChemOnomatopist::element( $_ ) } @chain ) > 1 ) { $name->append_stem( ChemOnomatopist::alkane_chain_name( $self->length ) ); } if( @double ) { $name .= 'a' if @double >= 2; # BBv2 P-16.8.2 if( $self->needs_multiple_bond_locants ) { $name->append_locants( $self->bond_locants( @double ) ); } if( @double > 1 ) { my $multiplier = ChemOnomatopist::IUPAC_numerical_multiplier( scalar @double ); $multiplier .= 'a' unless $multiplier =~ /i$/; # BBv2 P-31.1.1.2 $name->append_multiplier( $multiplier ); } $name .= 'en'; } if( @triple ) { $name .= 'a' if @triple >= 2 && !@double; # BBv2 P-16.8.2 if( $self->needs_multiple_bond_locants ) { $name->append_locants( $self->bond_locants( @triple ) ); } if( @triple > 1 ) { my $multiplier = ChemOnomatopist::IUPAC_numerical_multiplier( scalar @triple ); $multiplier .= 'a' unless $multiplier =~ /i$/; # BBv2 P-31.1.1.2 $name->append_multiplier( $multiplier ); } $name .= 'yn'; } $name .= ChemOnomatopist::Name::Part::AlkaneANSuffix->new( 'an' ) unless @double || @triple; $name .= 'e'; return $name; } sub vertex_ids { my $self = shift; my %ids = map { $self->{vertices}[$_] => $_ } 0..$self->length-1; return map { exists $ids{$_} ? $ids{$_} : undef } @_; } sub _cmp_instances { my( $A, $B ) = @_; # For now, just compare the sizes of cycles. # TODO: Proper ordering should be implemented as per BBv2 P-25.8.1 return $B->length <=> $A->length; } sub _disconnected_chain_graph() { my( $self ) = @_; my $graph = $self->graph->copy; my $vertices = set( $self->vertices ); $graph->delete_edges( map { @$_ } grep { $vertices->has( @$_ ) } $graph->edges ); return $graph; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/000077500000000000000000000000001463750375500216215ustar00rootroot00000000000000ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/ABA.pm000066400000000000000000000043421463750375500225450ustar00rootroot00000000000000package ChemOnomatopist::Chain::ABA; # ABSTRACT: a(ba)n chain as per BBv3 P-21.2.3.1 # VERSION use strict; use warnings; use parent ChemOnomatopist::Chain::; use ChemOnomatopist::Name::Part::Element; use ChemOnomatopist::Elements qw( %elements ); use Scalar::Util qw( blessed ); sub new { my( $class, $graph, @vertices ) = @_; return bless { graph => $graph, vertices => \@vertices }, $class; } sub add { my( $self, @atoms ) = @_; die "cannot extend ABA chain into more than two sides\n" if @atoms > 2; my @chains = map { blessed $_ ? [ $_->vertices ] : [ $_ ] } @atoms; my $graph = $self->graph; my @vertices = $self->vertices; for (@chains) { next unless @$_; if( $graph->has_edge( $self->{vertices}[ 0], $_->[ 0] ) ) { unshift @vertices, reverse @$_; } elsif( $graph->has_edge( $self->{vertices}[ 0], $_->[-1] ) ) { unshift @vertices, @$_; } elsif( $graph->has_edge( $self->{vertices}[-1], $_->[ 0] ) ) { push @vertices, @$_; } elsif( $graph->has_edge( $self->{vertices}[-1], $_->[-1] ) ) { push @vertices, reverse @$_; } } $self->{vertices} = \@vertices; } sub inner_element { ChemOnomatopist::element( $_[0]->{vertices}[1] ) } sub outer_element { ChemOnomatopist::element( $_[0]->{vertices}[0] ) } sub heteroatom_positions() { my( $self ) = @_; return @{$self->{heteroatom_positions}} if $self->{heteroatom_positions}; my @vertices = $self->vertices; my @heteroatom_positions; for (0..$#vertices) { next if ChemOnomatopist::element( $vertices[$_] ) eq 'C'; next if ChemOnomatopist::element( $vertices[$_] ) eq $self->inner_element; push @heteroatom_positions, $_; } $self->{heteroatom_positions} = \@heteroatom_positions; return @heteroatom_positions; } sub needs_heteroatom_locants() { '' } sub needs_substituent_locants() { my( $self ) = @_; return '' if $self->inner_element eq 'O' && $self->length == 3 && $self->number_of_branches == 1; return $self->SUPER::needs_substituent_locants; } sub prefix() { ChemOnomatopist::Name::Part::Element->new( $elements{$_[0]->inner_element}->{prefix} . 'ne' )->to_name } sub suffix() { &prefix } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Aceylene.pm000066400000000000000000000133771463750375500237170ustar00rootroot00000000000000package ChemOnomatopist::Chain::Aceylene; # ABSTRACT: Ace...ylene chain, as per BBv3 P-25.1.2.7 # VERSION use strict; use warnings; use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist::Chain::Phenanthrene; use ChemOnomatopist::Chain::Polyacene; use ChemOnomatopist::Util::Graph qw( graph_without_edge_attributes subgraph ); use Graph::Nauty qw( are_isomorphic ); use Graph::Traversal::DFS; use Graph::Undirected; use List::Util qw( any first ); use Scalar::Util qw( blessed ); sub new { my( $class, $graph, @cycles ) = @_; my $pentane = first { $_->length == 5 } @cycles; my $subgraph = $graph->subgraph( map { $_->vertices } @cycles ); my %cycles_per_atom; for (@cycles) { for ($_->vertices) { $cycles_per_atom{$_}++; } } my $center = first { $cycles_per_atom{$_} == 3 } $subgraph->vertices; my @vertices; if( @cycles == 3 ) { my $first = first { $cycles_per_atom{$_} == 1 } $pentane->vertices; my $last = first { $cycles_per_atom{$_} == 2 } $subgraph->neighbours( $first ); $subgraph->delete_edge( $first, $last ); $subgraph->delete_vertex( $center ); @vertices = reverse Graph::Traversal::DFS->new( $subgraph, start => $first )->dfs; @vertices = ( @vertices[0..1], $center, @vertices[2..$#vertices] ); } else { my $d3_subgraph = $subgraph->subgraph( grep { $subgraph->degree( $_ ) == 3 } $subgraph->vertices ); my $last = first { $d3_subgraph->degree( $_ ) == 2 } $subgraph->neighbours( $center ); my $first = first { $cycles_per_atom{$_} == 1 } $subgraph->neighbours( $last ); $subgraph->delete_edge( $first, $last ); $subgraph->delete_vertex( $center ); $subgraph->delete_edges( map { @$_ } grep { $subgraph->degree( $_->[0] ) == 3 && $subgraph->degree( $_->[1] ) == 3 } $subgraph->edges ); @vertices = reverse Graph::Traversal::DFS->new( $subgraph, start => $first )->dfs; # Restore the original subgraph $subgraph = $graph->subgraph( map { $_->vertices } @cycles ); my $first_d3 = first { $subgraph->degree( $vertices[$_] ) == 3 } 0..$#vertices; @vertices = ( @vertices[0..$first_d3], $center, @vertices[$first_d3+1..$#vertices] ); } return bless { graph => $graph, center => $center, vertices => \@vertices }, $class; } sub is_acenaphthylene() { $_[0]->length == 12 } sub is_aceanthrylene() { my( $self ) = @_; return $self->length == 16 && $self->graph->has_edge( $self->{center}, $self->{vertices}[2] ); } sub is_acephenanthrylene() { my( $self ) = @_; return $self->length == 16 && $self->graph->has_edge( $self->{center}, $self->{vertices}[3] ); } sub has_form($$) { my( $class, $graph ) = @_; return '' unless $graph->vertices == 12 || $graph->vertices == 16; return '' if any { blessed $_ || ChemOnomatopist::element( $_ ) ne 'C' } $graph->vertices; return 1 if are_isomorphic( graph_without_edge_attributes( $graph ), $class->ideal_graph_acenaphthylene, sub { ChemOnomatopist::element( $_[0] ) } ); return 1 if are_isomorphic( graph_without_edge_attributes( $graph ), $class->ideal_graph_aceanthrylene, sub { ChemOnomatopist::element( $_[0] ) } ); return 1 if are_isomorphic( graph_without_edge_attributes( $graph ), $class->ideal_graph_acephenanthrylene, sub { ChemOnomatopist::element( $_[0] ) } ); return ''; } sub ideal_graph_acenaphthylene() { my( $class ) = @_; my $graph = Graph::Undirected->new( refvertexed => 1 ); my @vertices = map { { symbol => 'C' } } 1..11; $graph->add_cycle( @vertices ); my $center = { symbol => 'C' }; $graph->add_path( $vertices[2], $center, $vertices[6] ); $graph->add_path( $vertices[2], $center, $vertices[10] ); return $graph; } sub ideal_graph_aceanthrylene() { my( $class ) = @_; my $graph = ChemOnomatopist::Chain::Polyacene->ideal_graph( 14 ); my $d3 = first { $graph->degree( $_ ) == 3 } $graph->vertices; my @d2 = grep { $graph->degree( $_ ) == 2 } $graph->neighbours( $d3 ); $graph->add_path( $d2[0], { symbol => 'C' }, { symbol => 'C' }, $d2[1] ); return $graph; } sub ideal_graph_acephenanthrylene() { my( $class ) = @_; my $graph = ChemOnomatopist::Chain::Phenanthrene->ideal_graph; my $d3_subgraph = subgraph( $graph, grep { $graph->degree( $_ ) == 3 } $graph->vertices ); my $d3 = first { $d3_subgraph->degree( $_ ) == 1 } $d3_subgraph->vertices; my @d2 = grep { $graph->degree( $_ ) == 2 } $graph->neighbours( $d3 ); $graph->add_path( $d2[0], { symbol => 'C' }, { symbol => 'C' }, $d2[1] ); return $graph; } sub locants(@) { my $self = shift; my @vertices = $self->vertices; my $graph = $self->graph->subgraph( @vertices ); my %locant_map; my $pos = 0; for my $i (0..$#vertices) { if( $vertices[$i] == $self->{center} ) { $locant_map{$i} = $pos . 'a1'; } elsif( $graph->degree( $vertices[$i] ) == 2 ) { $pos++; $locant_map{$i} = $pos; } else { $locant_map{$i} = $pos . 'a'; } } return map { $locant_map{$_} } @_; } sub number_of_rings() { $_[0]->is_acenaphthylene ? 3 : 4 } sub prefix() { &suffix } sub suffix() { my( $self ) = @_; return 'acenaphthylene' if $self->is_acenaphthylene; return 'aceanthrylene' if $self->is_aceanthrylene; return 'acephenanthrylene' if $self->is_acephenanthrylene; die "unknown ace...ylene\n"; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Amide.pm000066400000000000000000000014401463750375500231750ustar00rootroot00000000000000package ChemOnomatopist::Chain::Amide; # ABSTRACT: Amide chain # VERSION use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Name; use List::Util qw( first ); use Scalar::Util qw( blessed ); sub AUTOLOAD { our $AUTOLOAD; my $call = $AUTOLOAD; $call =~ s/.*:://; return if $call eq 'DESTROY'; my $self = shift; return $self->{chain}->can( $call )->( $self->{chain}, @_ ); } sub new { my( $class, $graph, $chain, $amide ) = @_; return bless { graph => $graph, chain => $chain, amide => $amide }; } sub vertices() { my $self = shift; my @vertices = ( $self->{amide}, $self->{chain}->vertices ); return @vertices; } sub locants(@) { my $self = shift; return map { $_ ? $_ : 'N' } @_; } sub needs_substituent_locants() { 1 } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Amine.pm000066400000000000000000000036531463750375500232170ustar00rootroot00000000000000package ChemOnomatopist::Chain::Amine; # ABSTRACT: Amine chain # VERSION use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Name; use List::Util qw( first ); use Scalar::Util qw( blessed ); sub AUTOLOAD { our $AUTOLOAD; my $call = $AUTOLOAD; $call =~ s/.*:://; return if $call eq 'DESTROY'; my $self = shift; return $self->{chain}->can( $call )->( $self->{chain}, @_ ); } sub new { my( $class, $graph, $chain, $amine ) = @_; return bless { graph => $graph, chain => $chain, amine => $amine }, $class; } sub isa { my( $self, $class ) = @_; return 1 if $class eq ChemOnomatopist::Chain::Amine::; return $self->{chain}->isa( $class ); } sub vertices() { my $self = shift; my @vertices = ( $self->{amine}, $self->{chain}->vertices ); return @vertices; } sub locants(@) { my $self = shift; return map { $_ ? $self->{chain}->locants( $_ - 1 ) : 'N' } @_; } # Need to adjust positions by 1 to accommodate the amino group as the first vertex sub heteroatom_positions { my( $self ) = @_; return map { $_ + 1 } $self->{chain}->heteroatom_positions; } sub is_hydrocarbon() { '' } sub is_main() { $_[0]->{is_main} } sub needs_substituent_locants() { $_[0]->{chain}->length > 0 } sub prefix() { my( $self ) = @_; return $self->{amine}->prefix unless $self->length; return $self->{chain}->prefix->append_stem( 'amino' ); } sub suffix() { my( $self ) = @_; my $suffix = $self->{chain}->suffix; return $suffix unless $self->{chain}->needs_suffix_locant; return $suffix if $self->{chain}->length == 2; # Ad-hoc fix for ethanamines my $neighbour = first { $self->graph->has_edge( $self->{amine}, $_ ) } $self->{chain}->vertices; die "cannot perceive connectivity in an amino chain\n" unless defined $neighbour; return $suffix->append_locants( $self->{chain}->locants( $self->vertex_ids( $neighbour ) ) ); } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Bicycle.pm000066400000000000000000000505001463750375500235310ustar00rootroot00000000000000package ChemOnomatopist::Chain::Bicycle; # ABSTRACT: Fused bicyclic chain # VERSION use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Chain::Circular; use ChemOnomatopist::Chain::Bicycle::Purine; use ChemOnomatopist::Chain::Monocycle; use ChemOnomatopist::Chain::Monocycle::Fused; use ChemOnomatopist::Comparable::Array::Numeric; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Name; use ChemOnomatopist::Name::Part::Fusion; use ChemOnomatopist::Name::Part::Stem; use ChemOnomatopist::Util qw( all_max all_min ); use ChemOnomatopist::Util::SMILES qw( cycle_SMILES ); use Chemistry::OpenSMILES qw( is_double_bond ); use Graph::Traversal::DFS; use List::Util qw( all any first min uniq ); use Scalar::Util qw( blessed ); use Set::Object qw( set ); use parent ChemOnomatopist::Chain::Circular::; # From BBv2 P-25.2.1 our @names = ( [ qw( NCNCCC NCCNCC pteridine ) ], [ qw( CNCNCC NCCNCC pteridine ) ], [ qw( NNCCCC CCCCCC cinnoline ) ], [ qw( NCNCCC CCCCCC quinazoline ) ], [ qw( NCCNCC CCCCCC quinoxaline ) ], [ 'c:c:c:n:c:c:', 'n:c:c:c:c:c:', '1,5-naphthyridine' ], # TODO: There are isomers [ 'c:n:n:c:c:c:', 'c:c:c:c:c:c:', 'phthalazine' ], [ 'n:c:c:c:c:c:', 'c:c:c:c:c:c:', 'quinoline' ], [ 'C=NC=CCC', 'c:c:c:c:c:c:', 'isoquinoline' ], [ 'c:n:c:c:c:c:', 'c:c:c:c:c:c:', 'isoquinoline' ], [ 'CC=CCn:c=', 'c:c:c:c:c:n:', 'quinolizine' ], [ 'CC=CC=c:n', 'c:c:c:c:n:c:', 'quinolizine' ], [ 'n:n:c:c:c:', 'c:c:c:c:c:c:', 'indazole' ], [ 'n:c:c:c:c:', 'c:c:c:c:c:c:', 'indole' ], [ 'c:n:c:c:c:', 'c:c:c:c:c:c:', 'isoindole' ], [ 'c:c:c:c:c:n:', 'c:c:c:n:c:', 'indolizine', ], [ qw( CCCCN CCCNC pyrrolizine ) ], [ qw( CCCNC CCCNC pyrrolizine ) ], ); for my $name (qw( indole indolizine isoindole isoquinoline quinoline quinolizine )) { for (grep { $_->[2] eq $name } @names) { my @As_parts = @$_; $As_parts[0] =~ s/n/\[as\]/g; $As_parts[1] =~ s/n/\[as\]/g; $As_parts[2] =~ s/^qu//; $As_parts[2] = 'ars' . $As_parts[2] unless $As_parts[2] =~ s/^iso(qu)?/isoars/; push @names, \@As_parts; my @P_parts = @$_; $P_parts[0] =~ s/n/p/g; $P_parts[1] =~ s/n/p/g; $P_parts[2] =~ s/^qu//; $P_parts[2] = 'phosph' . $P_parts[2] unless $P_parts[2] =~ s/^iso(qu)?/isophosph/; push @names, \@P_parts; } } for (@names) { $_->[0] =~ s/[=:]//g; $_->[1] =~ s/[=:]//g; ( $_->[0], $_->[1] ) = map { uc } @$_[0..1]; } # From BBv2 P-25.1.1, order of decreasing seniority our %hydrocarbons_by_size = ( '5,7' => 'azulene', '6,6' => 'naphthalene', '5,6' => 'indene', ); sub new { my( $class, $graph, @vertices ) = @_; # Has to be created early to be given for fused parts my $self = bless { graph => $graph, vertices => \@vertices }, $class; my $subgraph = $graph->subgraph( \@vertices ); my @bridge = grep { $subgraph->degree( $_ ) == 3 } @vertices; $subgraph->delete_edge( @bridge ); @vertices = Graph::Traversal::DFS->new( $subgraph, start => $bridge[0] )->dfs; $subgraph->delete_vertices( @bridge ); # Graph is broken into components. # Each component is represented as an array of vertices in the order of traverse. my @components = sort { @$a <=> @$b } $subgraph->connected_components; for (0..1) { if( @{$components[$_]} < 3 ) { die "bicycles with three and four-membered cycles are not supported yet\n" } my $subgraph = $graph->subgraph( [ @{$components[$_]}, @bridge ] ); $subgraph->delete_edge( @bridge ); my @path = Graph::Traversal::DFS->new( $subgraph, start => $bridge[$_] )->dfs; push @path, shift @path; $components[$_] = \@path; } my @cycles = map { ChemOnomatopist::Chain::Monocycle::Fused->new( $graph, $self, @$_ ) } @components; $self->{cycles} = \@cycles; my $nbenzene = scalar grep { $_->is_benzene } @cycles; if( !$nbenzene && $self->is_purine ) { return ChemOnomatopist::Chain::Bicycle::Purine->new( $graph, @cycles ); } elsif( !$nbenzene ) { # Find the senior cycle my @candidates = map { $_->candidates } map { ChemOnomatopist::Chain::Monocycle->new( $_->graph, $_->vertices ) } $self->cycles; for my $rule ( # P-25.3.2.4 (a): Senior heteroatom according to specific seniority order \&rule_most_senior_heteroatom, # TODO: P-25.3.2.4 (b): Concerns fusions of more than two rings # P-25.3.2.4 (c): Second ring has to be larger \&ChemOnomatopist::rule_longest_chains, # P-25.3.2.4 (d): Greater number of heteroatoms of any kind \&ChemOnomatopist::rule_most_heteroatoms, # P-25.3.2.4 (e): Greater variety of heteroatoms \&rule_greatest_variety_of_heteroatoms, # P-25.3.2.4 (f): Greater number of most senior heteroatoms \&ChemOnomatopist::rule_greatest_number_of_most_senior_heteroatoms, # TODO: P-25.3.2.4 (g): Concerns fusions of more than two rings # P-25.3.2.4 (h): Lower locants for heteroatoms \&ChemOnomatopist::rule_lowest_numbered_heteroatoms, # P-25.3.2.4 (i): Lower locants for senior heteroatoms \&ChemOnomatopist::rule_lowest_numbered_most_senior_heteroatoms, # TODO: P-25.3.2.4 (j): Concerns fusions of more than two rings ) { my @candidates_now = $rule->( @candidates ); if( @candidates_now == 1 ) { @candidates = @candidates_now; last; } elsif( @candidates ) { # CHECKME: This looks strange @candidates = @candidates_now; } else { last; } } # Here the "winning" ring is selected my $chain = shift @candidates; # Making the "winning" ring the first if( set( $chain->vertices ) == set( $self->{cycles}[1]->vertices ) ) { $self->{cycles} = [ reverse $self->cycles ]; } # Construct the candidates to determine the numbering order @candidates = ( $self, $self->flipped_horizontally, $self->flipped_vertically, $self->flipped_horizontally->flipped_vertically ); # Establish the order for my $rule ( # P-25.3.3.1.2 (a): Lower locants for heteroatoms \&ChemOnomatopist::rule_lowest_numbered_heteroatoms, # P-25.3.3.1.2 (b): Lower locants for senior heteroatoms \&ChemOnomatopist::rule_lowest_numbered_most_senior_heteroatoms, # P-25.3.3.1.2 (c): Lower locants for fusion carbon atoms \&rule_lowest_numbered_fusion_carbons, # P-25.3.3.1.2 (d): Lower locants for fusion heteroatoms (rather than nonfusion) \&rule_lowest_numbered_fusion_heteroatoms, # TODO: P-25.3.3.1.2 (e): Lower locants for interior heteroatom # P-25.3.3.1.2 (f): Lower locants for indicated hydrogen atoms \&ChemOnomatopist::rule_lowest_numbered_indicated_hydrogens, ) { my @candidates_now = $rule->( @candidates ); if( @candidates_now == 1 ) { @candidates = @candidates_now; last; } elsif( @candidates ) { # CHECKME: This looks strange @candidates = @candidates_now; } else { last; } } $self->{vertices} = [ (shift @candidates)->vertices ]; # FIXME: Simply return instead of self } elsif( $nbenzene == 1 ) { # Numbering has to start from cycle other than benzene if( $cycles[0]->is_benzene ) { @cycles = reverse @cycles; $self->{cycles} = \@cycles; } my( $chain ) = sort { ChemOnomatopist::Chain::Monocycle::_cmp( $a, $b ) } ( $cycles[0], $cycles[0]->flipped ); if( $chain != $cycles[0] ) { @cycles = map { $_->flipped } @cycles; $self->{cycles} = \@cycles; } $self->_adjust_vertices_to_cycles; # CHECKME: Is this needed? } return $self; } sub candidates() { my( $self ) = @_; my @chains = ( $self ); if( $self->is_naphthalene ) { # Generates all variants push @chains, $self->flipped_horizontally, $self->flipped_vertically, $self->flipped_horizontally->flipped_vertically; for (1..3) { $chains[$_]->{candidate_for} = $self; } } return @chains; } sub flipped_horizontally() { my( $self ) = @_; my $copy = $self->copy; my @vertices = reverse $copy->vertices; push @vertices, shift @vertices; $copy->{vertices} = \@vertices; return $copy; } sub flipped_vertically() { my( $self ) = @_; my $copy = $self->copy; my $cycle = first { set( $_->vertices )->has( $copy->{vertices}[0] ) } $copy->cycles; my @vertices = reverse $copy->vertices; for (1..$cycle->length-2) { unshift @vertices, pop @vertices; } $copy->{vertices} = \@vertices; return $copy; } sub copy() { my( $self ) = @_; return bless { graph => $self->graph, cycles => [ $self->cycles ], vertices => [ $self->vertices ], parent => $self->parent }, ChemOnomatopist::Chain::Bicycle::; } sub cycles() { my( $self ) = @_; return @{$self->{cycles}}; } sub fusion_positions() { my( $self ) = @_; my $bridge = set( map { @{$_->{vertices}}[-2..-1] } $self->cycles ); my @vertices = $self->vertices; return grep { $bridge->has( $vertices[$_] ) } 0..$#vertices; } sub fusion_carbon_positions() { my( $self ) = @_; return grep { ChemOnomatopist::element( $self->{vertices}[$_] ) eq 'C' } $self->fusion_positions; } sub fusion_heteroatom_positions() { my( $self ) = @_; return grep { ChemOnomatopist::element( $self->{vertices}[$_] ) ne 'C' } $self->fusion_positions; } sub parent(;$) { my( $self, $parent ) = @_; my $old_parent = $self->SUPER::parent( $parent ); return $old_parent unless $parent; return $old_parent if $old_parent && $parent == $old_parent; if( $self->is_naphthalene ) { my( $chain ) = ChemOnomatopist::filter_chains( $self->candidates ); $self->{vertices} = [ $chain->vertices ]; } return $old_parent; } sub has_form($$) { my( $class, $graph ) = @_; my %degrees = map { $graph->degree( $_ ) => 1 } $graph->vertices; return '' unless join( ',', sort keys %degrees ) eq '2,3'; # Ensure subgraph has three paths between the three-degreed vertices my @d3 = grep { $graph->degree( $_ ) == 3 } $graph->vertices; return '' unless @d3 == 2; $graph = $graph->copy; my @lengths; for (1..3) { my @path = $graph->SP_Dijkstra( @d3 ); return '' unless @path; $graph->delete_path( @path ); push @lengths, scalar @path; } # Ensure a single path directly joins the three-degreed vertices return '' unless @lengths == 3; return '' unless scalar( grep { $_ > 2 } @lengths ) == 2; return 1; } # Tells whether the outer bonds of the bicycle qualify as aromatic sub is_aromatic() { my( $self ) = @_; my @outer_vertices; for ($self->cycles) { my @vertices = $_->vertices; pop @vertices; push @outer_vertices, @vertices; } return ChemOnomatopist::Chain::Circular->new( $self->graph, @outer_vertices )->is_aromatic; } sub is_hydrocarbon() { my( $self ) = @_; return all { $_->is_hydrocarbon } $self->cycles; } sub is_naphthalene() { my( $self ) = @_; return $self->is_hydrocarbon && all { $_->length == 6 } $self->cycles; } sub is_purine() { my( $self ) = @_; return '' unless $self->number_of_heteroatoms == 4; return '' unless join( ',', uniq $self->heteroatoms ) eq 'N'; my @cycles = $self->cycles; my $pyrimidine = first { $_->length == 6 } @cycles; my $imidazole = first { $_->length == 5 } @cycles; return '' unless $pyrimidine && $imidazole; return '' unless join( ',', $imidazole->heteroatom_positions ) eq '0,2'; return '' unless join( ',', $pyrimidine->heteroatom_positions ) eq '0,2' || join( ',', $pyrimidine->heteroatom_positions ) eq '1,3'; return 1; } sub needs_indicated_hydrogens() { 1 } sub needs_heteroatom_locants() { my( $self ) = @_; return $self->suffix =~ /^benzo/; } sub needs_heteroatom_names() { my( $self ) = @_; return $self->needs_heteroatom_locants && all { !$_->is_Hantzsch_Widman } $self->cycles; } sub needs_substituent_locants() { 1 } sub indicated_hydrogens_part() { my( $self ) = @_; my $part = ChemOnomatopist::Name->new; return $part unless $self->number_of_indicated_hydrogens; return $self->SUPER::indicated_hydrogens_part if $self->is_naphthalene; # Use parent procedure if all the indicated hydrogens are confined to a single ring return $self->SUPER::indicated_hydrogens_part if any { !$_->number_of_indicated_hydrogens } $self->cycles; if( $self->number_of_indicated_hydrogens && $self->number_of_indicated_hydrogens < $self->length ) { $part->append_locants( map { $_ . 'H' } $self->locants( $self->indicated_hydrogens ) ); } return $part; } sub prefix() { my( $self ) = @_; my $name = $self->suffix; $name = ChemOnomatopist::Name->new( $name ) unless blessed $name; $name->pop_e; if( $self->parent ) { # FIXME: Not stable for naphthalene my @vertices = $self->vertices; my $position = first { $self->graph->has_edge( $self->parent, $vertices[$_] ) } 0..$#vertices; die "unknown locant in multicyclic compound\n" unless defined $position; $name->append_substituent_locant( $self->locants( $position ) ); } $name .= 'yl'; return $name; } sub suffix() { my( $self ) = @_; if( $self->is_hydrocarbon ) { # FIXME: Check if aromatic, but with caution, as substitutions will break aromaticity my $cycle_sizes = join ',', sort map { $_->length } $self->cycles; if( exists $hydrocarbons_by_size{$cycle_sizes} ) { return ChemOnomatopist::Name::Part::Stem->new( $hydrocarbons_by_size{$cycle_sizes} )->to_name; } if( $cycle_sizes =~ /^(\d+),\1$/ ) { my $name = ChemOnomatopist::alkane_chain_name( $1 ) . 'alene'; return ChemOnomatopist::Name::Part::Stem->new( $name )->to_name; } } my @SMILES = map { $_->backbone_SMILES } $self->cycles; print STDERR "bicycle SMILES: @SMILES\n" if $ChemOnomatopist::DEBUG; my $retained = find_retained( @SMILES ); return ChemOnomatopist::Name::Part::Stem->new( $retained->[2] )->to_name if $retained; if( any { $_->is_benzene } $self->cycles ) { my $other = first { !$_->is_benzene } $self->cycles; my $name = ChemOnomatopist::Name->new( 'benzo' ); if( $other->length == 6 && $other->is_monoreplaced && join( '', $other->heteroatoms ) =~ /^(O|S|Se|Te)$/ && join( '', $other->heteroatom_positions ) < 4 ) { # Names according to BBv3 P-25.2.1, Table 2.8, (23) and (24) my( $element ) = $other->heteroatoms; if( $element ne 'O' ) { $name .= $elements{$element}->{prefix}; $name->[-1] =~ s/a$/o/; } return $name . 'pyran'; } else { my $other_name = $other->suffix; if( $other_name->starts_with_locant ) { # Locants are moved to front unshift @$name, shift @$other_name; } $name->[-1] =~ s/o$// if $other_name->[0] =~ /^a/; return $name . $other_name; } } # Fusion naming according to BBv2 P-25.3.1.3 # Find the bridge vertices my @bridge = $self->{cycles}[0]->vertices; @bridge = @bridge[-2..-1]; # Find autosymmetric equivalents having the least locants for the bridge my @equiv_A = $self->{cycles}[0]->autosymmetric_equivalents; my @equiv_B = $self->{cycles}[1]->autosymmetric_equivalents; my $min_A = min map { $_->vertex_ids( @bridge ) } @equiv_A; @equiv_A = grep { min( $_->vertex_ids( @bridge ) ) == $min_A } @equiv_A; my $min_B = min map { $_->vertex_ids( @bridge ) } @equiv_B; @equiv_B = grep { min( $_->vertex_ids( @bridge ) ) == $min_B } @equiv_B; my $fusion; if( $self->{cycles}[1]->is_homogeneous && $self->{cycles}[1]->number_of_branches == 2 ) { $fusion = '['; } elsif( @equiv_A > 1 || @equiv_B > 1 ) { # At least one of the rings has mirror symmetry ("flip-symmetric"), thus numeric order is ascending $fusion = '[' . ($min_B+1) . ',' . ($min_B+2) . '-'; } else { # Rings are rigid, thus numeric order has to be derived my @order_A = $equiv_A[0]->vertex_ids( @bridge ); my @order_B = $equiv_B[0]->vertex_ids( @bridge ); if( ($order_A[0] <=> $order_A[1]) == ($order_B[0] <=> $order_B[1]) ) { # Ring atoms are encountered in the same order in both of the rings $fusion = '[' . ($min_B+1) . ',' . ($min_B+2); } else { # Ring atom orders differ $fusion = '[' . ($min_B+2) . ',' . ($min_B+1); } $fusion .= '-'; } $fusion .= chr( 97 + $min_A ) . ']'; my @ideal = map { ChemOnomatopist::Chain::Monocycle->new( $_->graph, $_->vertices ) } $self->cycles; my $name_A = $ideal[1]->suffix; $name_A =~ s/^\d+H-//; # Retained prefixes from BBv3 P-25.3.2.2.3 $name_A = 'anthra' if $name_A eq 'anthracene'; $name_A = 'naphth' if $name_A eq 'naphthalene'; $name_A = 'benz' if $name_A eq 'benzene'; $name_A = 'phenanthr' if $name_A eq 'phenanthrene'; $name_A = 'fur' if $name_A eq 'furan'; $name_A = 'imidaz' if $name_A eq 'imidazole'; $name_A = 'pyrid' if $name_A eq 'pyridine'; $name_A = 'pyrimid' if $name_A eq 'pyrimidine'; $name_A = 'thien' if $name_A eq 'thiophene'; $name_A = ChemOnomatopist::Name->new( $name_A ) unless blessed $name_A; $name_A->pop_e; if( $name_A->ends_with_alkane_an_suffix ) { pop @$name_A; $name_A .= 'a'; } else { $name_A .= 'o'; } my $name = ChemOnomatopist::Name->new; $name .= $name_A; $name .= ChemOnomatopist::Name::Part::Fusion->new( $fusion ); my $name_B = $ideal[0]->suffix; $name_B->[0] =~ s/\d+H-//; $name .= $name_B; $name->bracket_numeric_locants; return $name; } sub rule_most_senior_heteroatom { my( @chains ) = @_; # This order is taken from BBv2 P-25.3.2.4 (a) and is different from order in %elements my @element_order = qw( N F Cl Br I O S Se Te P As Sb Bi Si Ge Sn Pb B Al Ga In Tl ); my %element_order = map { $element_order[$_] => $_ } 0..$#element_order; my( $max_value ) = sort { $element_order{$a} <=> $element_order{$b} } grep { exists $element_order{$_} } map { $_->heteroatoms } @chains; return @chains unless $max_value; return grep { any { $_ eq $max_value } $_->heteroatoms } @chains; } sub rule_greatest_variety_of_heteroatoms { all_max { scalar uniq $_->heteroatoms } @_ } sub rule_lowest_numbered_fusion_carbons { all_min { ChemOnomatopist::Comparable::Array::Numeric->new( $_->fusion_carbon_positions ) } @_ } sub rule_lowest_numbered_fusion_heteroatoms { all_min { ChemOnomatopist::Comparable::Array::Numeric->new( $_->fusion_heteroatom_positions ) } @_ } sub _adjust_vertices_to_cycles() { my( $self ) = @_; my @cycles = $self->cycles; $self->{vertices} = []; push @{$self->{vertices}}, $cycles[0]->vertices; pop @{$self->{vertices}}; push @{$self->{vertices}}, $cycles[1]->vertices; pop @{$self->{vertices}}; return $self; } sub find_retained { my @SMILES = map { s/[=:]//g; uc } @_; return first { ($_->[0] eq $SMILES[0] && $_->[1] eq $SMILES[1]) || ($_->[0] eq $SMILES[1] && $_->[1] eq $SMILES[0]) } @names; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Bicycle/000077500000000000000000000000001463750375500231735ustar00rootroot00000000000000ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Bicycle/Purine.pm000066400000000000000000000020211463750375500247660ustar00rootroot00000000000000package ChemOnomatopist::Chain::Bicycle::Purine; # ABSTRACT: Purine chain # VERSION use strict; use warnings; use List::Util qw( first ); use parent ChemOnomatopist::Chain::Bicycle::; sub new { my( $class, $graph, @cycles ) = @_; my $imidazole = first { $_->length == 5 } @cycles; my $pyrimidine = first { $_->length == 6 } @cycles; if( ChemOnomatopist::element( $pyrimidine->{vertices}[0] ) eq 'N' ) { $pyrimidine = $pyrimidine->flipped; } if( $pyrimidine->{vertices}[-1] != $imidazole->{vertices}[-1] ) { $imidazole = $imidazole->flipped; } my @vertices = ( @{$pyrimidine->{vertices}}[1..5], $pyrimidine->{vertices}[0], @{$imidazole->{vertices}}[0..2] ); return bless { graph => $graph, cycles => \@cycles, vertices => \@vertices }, $class; } sub is_purine() { 1 } sub locants(@) { my $self = shift; return map { $_ + 1 } @_; } sub suffix() { ChemOnomatopist::Name->new( 'purine' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Carboxamide.pm000066400000000000000000000024671463750375500244060ustar00rootroot00000000000000package ChemOnomatopist::Chain::Carboxamide; # ABSTRACT: Carboxamide chain # VERSION use strict; use warnings; use parent ChemOnomatopist::Chain::; use ChemOnomatopist::Name; use List::Util qw( first ); sub new { my( $class, $graph, $amide, $C, $chain ) = @_; $chain->parent( $C ); return bless { graph => $graph, chain => $chain, vertices => [ $amide, $C, $chain->vertices ] }, $class; } sub needs_heteroatom_locants() { '' } sub needs_heteroatom_names() { '' } sub locants(@) { my $self = shift; return map { $_ > 1 ? $_ - 1 : $_ ? '?' : 'N' } @_; } # FIXME: This is a source of possible failures sub prefix() { ChemOnomatopist::Name->new( 'benzamido' ) } sub suffix() { my( $self ) = @_; return 'benz' if $self->{chain}->is_benzene; my $suffix = $self->{chain}->suffix; if( !$self->{chain}->isa( ChemOnomatopist::Chain::Monocycle:: ) || $self->{chain}->needs_substituent_locants ) { my @vertices = $self->{chain}->vertices; my $locant = first { $self->graph->has_edge( $self->{vertices}[1], $vertices[$_] ) } 0..$#vertices; $suffix->append_locants( $locant + 1 ); } $suffix .= 'carbo'; $suffix .= 'x' if $self->{vertices}[0]{ketone}->element eq 'O'; return $suffix; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Chalcogen.pm000066400000000000000000000017061463750375500240460ustar00rootroot00000000000000package ChemOnomatopist::Chain::Chalcogen; # ABSTRACT: Chalcogen chain # VERSION use strict; use warnings; use ChemOnomatopist::Name; use Graph::Traversal::DFS; use List::Util qw( first ); use parent ChemOnomatopist::Chain::; sub new { my( $class, $graph, $parent, @vertices ) = @_; my $subgraph = $graph->subgraph( @vertices ); @vertices = Graph::Traversal::DFS->new( $subgraph, start => first { $subgraph->degree( $_ ) == 1 } @vertices )->dfs; return bless { graph => $graph, vertices => \@vertices }, $class; } sub needs_heteroatom_names() { '' } sub needs_suffix_locant() { '' } my %suffix = ( O => 'oxidane', S => 'sulfane', Se => 'selane', Te => 'tellane' ); sub suffix() { my( $self ) = @_; my $name = ChemOnomatopist::Name->new; $name .= ChemOnomatopist::IUPAC_numerical_multiplier( $self->length ); $name .= $suffix{$self->{vertices}[0]->{symbol}}; return $name; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Circular.pm000066400000000000000000000162161463750375500237310ustar00rootroot00000000000000package ChemOnomatopist::Chain::Circular; # ABSTRACT: Chain whose first and last members are connected # VERSION use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Chain; use ChemOnomatopist::Chain::Monocycle; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Name::Part::NondetachablePrefix; use ChemOnomatopist::Util qw( circle_permutations ); use ChemOnomatopist::Util::SMILES qw( cycle_SMILES ); use Chemistry::OpenSMILES qw( is_single_bond ); use List::Util qw( all any uniq ); use Scalar::Util qw( blessed ); use Set::Object qw( set ); use parent ChemOnomatopist::Chain::; sub new { my( $class, $graph, @vertices ) = @_; return bless { graph => $graph, vertices => \@vertices }, $class; } # Selecting the candidate with the lowest alphabetical order sub backbone_SMILES() { my( $self ) = @_; my( $SMILES ) = sort map { cycle_SMILES( $self->graph, @$_ ) } circle_permutations( $self->vertices ); return $SMILES; } sub is_Hantzsch_Widman() { my( $self ) = @_; return $self->length >= 3 && $self->length <= 10 && # "no more than ten ring members" $self->number_of_heteroatoms && # "containing one or more heteroatoms" any { $_->{symbol} =~ /^[cC]$/ } $self->vertices; } # FIXME: What to do with furan and others? sub is_aromatic() { all { $_ eq ':' } $_[0]->bonds } sub is_benzene() { my( $self ) = @_; return $self->is_aromatic && $self->is_homogeneous && $self->length == 6; } sub is_heterocycle() { $_[0]->number_of_heteroatoms > 0 } sub is_homogeneous() { my( $self ) = @_; if( any { blessed $_ } $self->vertices ) { die "cannot process cycles with groups as their members\n"; } my @elements = map { $_->{symbol} } $self->vertices; my @bonds = $self->bonds; my( $element ) = @elements; my( $bond ) = @bonds; return '' if any { $_ ne $element } @elements; return 1 if $self->is_aromatic; return '' if any { $_ ne $bond } @bonds; return 1; } sub is_monoreplaced() { $_[0]->number_of_heteroatoms == 1 } sub needs_indicated_hydrogens() { '' } sub needs_multiple_bond_locants() { my( $self ) = @_; return 1 if $self->number_of_charges; return 1 if $self->number_of_branches || $self->parent; return 1 if scalar( uniq map { $_->{symbol} } $self->vertices ) > 1; return $self->number_of_multiple_bonds > 1 && $self->number_of_multiple_bonds < $self->length; } sub needs_substituent_locants() { my( $self ) = @_; # BBv2 P-14.3.4.2 (c): monosubstituted homogeneous cycles do not need locants return '' if $self->is_homogeneous && $self->number_of_branches == 1 && !$self->number_of_isotopes; # If there is only one kind of substituents, locants are not needed if( $self->is_homogeneous && $self->number_of_branches >= $self->max_valence - 1 ) { return '' if uniq( map { "$_" } map { @$_ } $self->locant_names ) == 1; } return 1; } sub needs_charge_locants() { my( $self ) = @_; return 1 if $self->number_of_charges > 1; return 1 if $self->number_of_branches; return !$self->is_homogeneous; } sub needs_isotope_locants() { my( $self ) = @_; return 1 if $self->number_of_isotopes > 1; return 1 if $self->number_of_branches; return !$self->is_homogeneous; } sub needs_suffix_locant() { $_[0]->needs_substituent_locants } sub bonds() { my( $self ) = @_; my @bonds = $self->SUPER::bonds; my $graph = $self->graph; my @vertices = $self->vertices; if( $graph->has_edge_attribute( $vertices[0], $vertices[-1], 'bond' ) ) { push @bonds, $graph->get_edge_attribute( $vertices[0], $vertices[-1], 'bond' ); } else { push @bonds, '-'; } return @bonds; } sub indicated_hydrogens() { my( $self ) = @_; my @positions; my @vertices = $self->vertices; my $graph = $self->graph; for my $i (0..$#vertices) { # Rough interpretation of BBv2 P-14.7.1 and P-22.2.2.1.4 next unless $vertices[$i]->{hcount}; my $element = ChemOnomatopist::element( $vertices[$i] ); if( $element eq 'C' ) { if( $vertices[$i]->{hcount} == 2 || ($graph->degree( $vertices[$i] ) == 3 && $vertices[$i]->{hcount} == 1 ) ) { push @positions, $i; } } elsif( $element eq 'N' && $vertices[$i]->{hcount} == 1 ) { push @positions, $i; } elsif( $element eq 'Si' && $vertices[$i]->{hcount} == 2 ) { push @positions, $i; } } return @positions; } # Implemented according to BBv2 P-25.3.3.1.1 sub locants(@) { my $self = shift; my @vertices = $self->vertices; my $graph = $self->graph->subgraph( @vertices ); return map { $_ + 1 } @_ if $graph->vertices == $graph->edges; my %locant_map; my $pos = 0; my $letter = 'a'; for my $i (0..$#vertices) { if( $graph->degree( $vertices[$i] ) == 2 || !ChemOnomatopist::element( $vertices[$i] ) || ChemOnomatopist::element( $vertices[$i] ) ne 'C' ) { $pos++; $locant_map{$i} = $pos; $letter = 'a'; } else { $locant_map{$i} = $pos . $letter; $letter++; } } return map { $locant_map{$_} } @_; } # In aromatic systems Kekule bonds have to be calculated, otherwise seniority rules may fail. sub number_of_double_bonds() { $_[0]->is_aromatic ? int( $_[0]->length / 2 ) : $_[0]->SUPER::number_of_double_bonds } sub number_of_multiple_bonds() { $_[0]->is_aromatic ? $_[0]->number_of_double_bonds : $_[0]->SUPER::number_of_multiple_bonds } sub number_of_rings() { my( $self ) = @_; return scalar $self->cycles if $self->can( 'cycles' ); return 1; } sub indicated_hydrogens_part() { my( $self ) = @_; my $part = ChemOnomatopist::Name->new; return $part unless $self->number_of_indicated_hydrogens; if( $self->needs_indicated_hydrogens ) { my @indicated_hydrogens = $self->indicated_hydrogens; my $single_H = shift @indicated_hydrogens if @indicated_hydrogens % 2; if( @indicated_hydrogens ) { if( $self->number_of_indicated_hydrogens < $self->length ) { $part->append_locants( $self->locants( @indicated_hydrogens ) ); } $part .= ChemOnomatopist::IUPAC_numerical_multiplier( scalar @indicated_hydrogens ); $part->[-1] .= 'a' unless $part =~ /[ai]$/; $part .= 'hydro'; } if( defined $single_H ) { $part->append_locants( map { $_ . 'H' } $self->locants( $single_H ) ); } } return $part; } sub _cmp_instances { my( $A, $B ) = @_; # BBv2 P-44.2.1 (a) if( $A->is_heterocycle <=> $B->is_heterocycle ) { return $B->is_heterocycle <=> $A->is_heterocycle; } # BBv2 P-44.2.1 (b) if( set( $A->heteroatoms )->has( 'N' ) <=> set( $B->heteroatoms )->has( 'N' ) ) { return set( $B->heteroatoms )->has( 'N' ) <=> set( $A->heteroatoms )->has( 'N' ); } # BBv2 P-44.2.1 (e) return $B->length <=> $A->length if $A->length <=> $B->length; # BBv2 P-44.2.1 (f) return scalar( $B->heteroatoms ) <=> scalar( $A->heteroatoms ); } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Ether.pm000066400000000000000000000062641463750375500232360ustar00rootroot00000000000000package ChemOnomatopist::Chain::Ether; # ABSTRACT: Ether chain # VERSION use strict; use warnings; use parent ChemOnomatopist::Chain::; use ChemOnomatopist; use ChemOnomatopist::Group::Ether; use ChemOnomatopist::Name; use List::Util qw( first ); use Scalar::Util qw( blessed ); sub new { my( $class, $graph, $parent, @vertices ) = @_; my $self = { vertices => \@vertices, graph => $graph }; $self->{parent} = $parent if $parent; return bless $self, $class; } sub ether_position() { my( $self ) = @_; my @vertices = $self->vertices; return first { blessed $vertices[$_] && $vertices[$_]->isa( ChemOnomatopist::Group::Ether:: ) } 0..$#vertices; } sub heteroatom_positions() { my( $self ) = @_; return @{$self->{heteroatom_positions}} if $self->{heteroatom_positions}; my @vertices = $self->vertices; my $ether_position = $self->ether_position; my @heteroatom_positions; for (0..$#vertices) { next if $_ == $ether_position; next if ChemOnomatopist::is_element( $vertices[$_], 'C' ); push @heteroatom_positions, $_; } $self->{heteroatom_positions} = \@heteroatom_positions; return @heteroatom_positions; } sub needs_heteroatom_locants() { 1 } sub needs_heteroatom_names() { 1 } sub needs_substituent_locants() { '' } sub prefix() { my( $self ) = @_; my @vertices = $self->vertices; return 'oxy' if @vertices == 1; my $cut_position = $self->ether_position; if( $cut_position ) { my @chains = ( ChemOnomatopist::Chain->new( $self->graph, $self->parent, reverse @vertices[0..$cut_position-1] ), ChemOnomatopist::Chain->new( $self->graph, $self->parent, @vertices[$cut_position+1..$#vertices] ) ); my @prefixes = map { $_->pop_yl } map { $_->prefix } @chains; my $name = ChemOnomatopist::Name->new; $name->append_locants( $cut_position ) if $chains[0]->needs_substituent_locants; $name->append( $prefixes[1] ); $name->append( 'oxy' ); $name->append( $prefixes[0] ); return $name; } else { my $chain = ChemOnomatopist::Chain->new( $self->graph, @vertices ); my $name = $chain->prefix->pop_yl; return $name . 'oxy'; } } sub suffix() { my( $self ) = @_; my @vertices = $self->vertices; my $cut_position = $self->ether_position; my @chains = ( ChemOnomatopist::Chain->new( $self->graph, $self->parent, reverse @vertices[0..$cut_position-1] ), ChemOnomatopist::Chain->new( $self->graph, $self->parent, @vertices[$cut_position+1..$#vertices] ) ); @chains = reverse @chains if $chains[0]->length > $chains[1]->length; my $name = $chains[0]->prefix->pop_yl; $name .= 'oxy'; $name .= $chains[1]->suffix->pop_yl; return $name; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Fluorene.pm000066400000000000000000000050171463750375500237410ustar00rootroot00000000000000package ChemOnomatopist::Chain::Fluorene; # ABSTRACT: Fluorene or its derivative # VERSION use strict; use warnings; use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist::Name; use ChemOnomatopist::Util::Graph qw( graph_without_edge_attributes merge_graphs ); use Graph::Nauty qw( are_isomorphic ); use Graph::Traversal::DFS; use Graph::Undirected; use List::Util qw( first ); use Set::Object qw( set ); sub new { my( $class, $graph, @cycles ) = @_; my $subgraph = $graph->subgraph( map { $_->vertices } @cycles ); my( $cyclopentane, @benzenes ) = sort { $a->length <=> $b->length } @cycles; my( $apex ) = (set( $cyclopentane->vertices ) - set( $benzenes[0]->vertices ) - set( $benzenes[1]->vertices ))->members; $subgraph->delete_vertices( $apex, $subgraph->neighbours( $apex ) ); my $start = first { $subgraph->degree( $_ ) == 1 } $subgraph->vertices; $subgraph = $graph->subgraph( map { $_->vertices } @cycles ); $subgraph->delete_edge( (set( $cyclopentane->vertices ) * set( $benzenes[0]->vertices ))->members ); $subgraph->delete_edge( (set( $cyclopentane->vertices ) * set( $benzenes[1]->vertices ))->members ); $subgraph->delete_edge( $start, (set( $subgraph->neighbours( $start ) ) * set( $cyclopentane->vertices ))->members ); my @vertices = Graph::Traversal::DFS->new( $subgraph, start => $start )->dfs; return bless { graph => $graph, vertices => \@vertices }, $class; } sub has_form($$) { my( $class, $graph ) = @_; return '' unless $graph->vertices == 13; return are_isomorphic( graph_without_edge_attributes( $graph ), $class->ideal_graph, sub { ChemOnomatopist::element( $_[0] ) } ); } sub ideal_graph($) { my( $class ) = @_; my @graphs; for (0..1) { my $graph = Graph::Undirected->new( refvertexed => 1 ); $graph->add_cycle( map { { symbol => 'C' } } 1..6 ); push @graphs, $graph; } my $graph = merge_graphs( @graphs ); # Pick an edge from each graph my( $A ) = $graphs[0]->edges; my( $B ) = $graphs[1]->edges; # Join a pair of atoms with an edge $graph->add_edge( $A->[0], $B->[0] ); # Add a longer arc between other two atoms $graph->add_path( $A->[1], { symbol => 'C' }, $B->[1] ); return $graph; } sub number_of_rings() { 3 } sub prefix() { ChemOnomatopist::Name->new( 'fluorene' ) } sub suffix() { ChemOnomatopist::Name->new( 'fluorene' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/FromHalves.pm000066400000000000000000000070751463750375500242360ustar00rootroot00000000000000package ChemOnomatopist::Chain::FromHalves; # ABSTRACT: Chain formed from two halves # VERSION use strict; use warnings; use parent ChemOnomatopist::Chain::; use Chemistry::OpenSMILES qw( is_single_bond ); use Clone qw( clone ); use List::Util qw( all sum sum0 ); sub new { my( $class, @halves ) = @_; # FIXME: This check might be too weak die "halves must belong to the same graph\n" if $halves[0]->graph ne $halves[1]->graph; return bless { halves => \@halves }, $class; } sub graph() { my( $self ) = @_; return $self->{halves}[0]->graph; } sub halves() { my( $self ) = @_; return @{$self->{halves}}; } sub shares_start() { !defined $_[0]->{halves}[0]{other_center} } sub branch_positions() { my( $self ) = @_; my @half0_positions = $self->{halves}[0]->branch_positions; my @half1_positions = $self->{halves}[1]->branch_positions; # If path parts start at the same atom, its attachments get duplicated @half1_positions = grep { $_ } @half1_positions if $self->shares_start; return ( map { $self->{halves}[0]->length - $_ - 1 } reverse @half0_positions ), ( map { $self->{halves}[1]->length + $_ - $self->shares_start } @half1_positions ); } sub most_senior_group_positions() { my( $self ) = @_; my @half0_positions = $self->{halves}[0]->most_senior_group_positions; my @half1_positions = $self->{halves}[1]->most_senior_group_positions; # If path parts start at the same atom, its attachments get duplicated @half1_positions = grep { $_ } @half1_positions if $self->shares_start; return ( map { $self->{halves}[0]->length - $_ - 1 } reverse @half0_positions ), ( map { $self->{halves}[1]->length + $_ - $self->shares_start } @half1_positions ); } sub bonds() { my( $self ) = @_; my @bonds = reverse $self->{halves}[0]->bonds; if( !$self->shares_start ) { my @centers = map { $_->{other_center} } $self->halves; if( $self->graph->has_edge_attribute( @centers, 'bond' ) ) { push @bonds, $self->graph->get_edge_attribute( @centers, 'bond' ); } else { push @bonds, '-'; } } push @bonds, $self->{halves}[1]->bonds; return @bonds; } sub locant_names() { my( $self ) = @_; return reverse( $self->{halves}[0]->locant_names ), $self->{halves}[1]->locant_names; } sub isotopes() { my( $self ) = @_; # Cloning is needed in order not to affect the original arrays my @half0_isotopes = map { clone $_ } $self->{halves}[0]->isotopes; my @half1_isotopes = map { clone $_ } $self->{halves}[1]->isotopes; @half1_isotopes = grep { $_->{index} } @half1_isotopes if $self->shares_start; for (@half0_isotopes) { $_->{index} = $self->{halves}[0]->length - $_->{index} - 1; ( $_->{locant} ) = $self->locants( $_->{index} ); } for (@half1_isotopes) { $_->{index} = $self->{halves}[1]->length + $_->{index} - $self->shares_start; ( $_->{locant} ) = $self->locants( $_->{index} ); } return my @isotopes = ( reverse( @half0_isotopes ), @half1_isotopes ); } # Not sure why this has to be overriden sub number_of_branches() { my( $self ) = @_; return int sum0 map { $_->number_of_branches } $self->halves; } sub vertices() { my( $self ) = @_; my @A = $self->{halves}[0]->vertices; my @B = $self->{halves}[1]->vertices; # If there is only one center atom, it appears in both chains shift @B if $self->shares_start; my @vertices = ( reverse( @A ), @B ); # Otherwise scalar is returned sometimes return @vertices; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Imine.pm000066400000000000000000000023431463750375500232220ustar00rootroot00000000000000package ChemOnomatopist::Chain::Imine; # ABSTRACT: Imine chain # VERSION use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Name; use List::Util qw( first ); use Scalar::Util qw( blessed ); sub AUTOLOAD { our $AUTOLOAD; my $call = $AUTOLOAD; $call =~ s/.*:://; return if $call eq 'DESTROY'; my $self = shift; return $self->{chain}->can( $call )->( $self->{chain}, @_ ); } sub new { my( $class, $graph, $chain, $amine ) = @_; return bless { graph => $graph, chain => $chain, amine => $amine }; } sub vertices() { my $self = shift; my @vertices = ( $self->{amine}, $self->{chain}->vertices ); return @vertices; } sub locants(@) { my $self = shift; return map { $_ ? $_ : 'N' } @_; } sub needs_substituent_locants() { 1 } sub suffix() { my( $self ) = @_; my $suffix = $self->{chain}->suffix; return $suffix unless $self->{chain}->needs_suffix_locant; return $suffix if $self->{chain}->length == 2; # Ad-hoc fix for ethanamines my $neighbour = first { $self->graph->has_edge( $self->{amine}, $_ ) } $self->{chain}->vertices; return $suffix->append_locants( $self->{chain}->locants( $self->vertex_ids( $neighbour ) ) ); } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Monocycle.pm000066400000000000000000000307661463750375500241230ustar00rootroot00000000000000package ChemOnomatopist::Chain::Monocycle; # ABSTRACT: Monocyclic group # VERSION use strict; use warnings; use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Group::Sulfinyl; use ChemOnomatopist::Group::Sulfonyl; use ChemOnomatopist::Name; use ChemOnomatopist::Util qw( all_min circle_permutations cmp_arrays ); use List::Util qw( all first ); use Scalar::Util qw( blessed ); # From BBv2 P-22.2.1 our %names = ( CCNCO => '1,3-oxazolidine', CCCNO => '1,2-oxazolidine', CCCCN => 'pyrrolidine', CCCNN => 'pyrazolidine', CCNCN => 'imidazolidine', CCCCCN => 'piperidine', CCNCCN => 'piperazine', CCNCCO => 'morpholine', 'CCNCCS' => 'thiomorpholine', 'CCNCC[Se]' => 'selenomorpholine', 'CCNCC[Te]' => 'telluromorpholine', # 5-membered aromatic (monoheteroatoms are handled elsewhere) 'C=CN=CN' => 'imidazole', 'C=NCCN' => 'imidazole', # 4,5-dihydro-1H-imidazole 'C=CN=CO' => '1,3-oxazole', 'C=CC=NO' => '1,2-oxazole', 'C=CC=NN' => 'pyrazole', # FIXME: Adjust for isomerism 'C=CC=C[Se]' => 'selenophene', 'C=CC=C[Te]' => 'tellurophene', # 6-membered aromatic 'c:c:c:c:c:c:' => 'benzene', 'C=CC=CCO' => 'pyran', 'C=CC=CCS' => 'thiopyran', 'C=CC=CC[Se]' => 'selenopyran', 'C=CC=CC[Te]' => 'telluropyran', 'c:c:n:c:c:n:' => 'pyrazine', 'c:c:c:c:n:n:' => 'pyridazine', 'c:c:c:c:c:n:' => 'pyridine', 'c:c:c:n:c:n:' => 'pyrimidine', # Various cases of aromatisation 'C=CCC=CN' => 'pyridine', 'C=CC=CCN' => 'pyridine', ); my %five_membered_aromatic_single_heteroatom = ( N => 'pyrrole', O => 'furan', S => 'thiophene', ); sub new { my( $class, $graph, @vertices ) = @_; my $self = bless { graph => $graph, vertices => [ @vertices ] }, $class; return $self if $self->is_homogeneous; # TODO: This code is not optimal, but works my( $senior_heteroatom ) = sort { $elements{$a}->{seniority} <=> $elements{$b}->{seniority} } grep { $_ ne 'C' } map { ChemOnomatopist::element( $_ ) } @vertices; return $self unless $senior_heteroatom; my @chains; for (circle_permutations( @vertices )) { next unless ChemOnomatopist::element( $_->[0] ) eq $senior_heteroatom; push @chains, ChemOnomatopist::Chain::Circular->new( $graph, @$_ ); } my( $first ) = sort { _cmp( $a, $b ) } @chains; return bless { graph => $graph, vertices => [ $first->vertices ] }, $class; } sub candidates() { my( $self ) = @_; my $graph = $self->graph; my @vertices = $self->vertices; my $parent = $self->parent; my( $senior_heteroatom ) = $self->heteroatoms; my @chains; for (0..$#vertices) { push @chains, bless { graph => $graph, vertices => [ @vertices ], parent => $parent }, ChemOnomatopist::Chain::Monocycle:: if !$senior_heteroatom || ChemOnomatopist::element( $vertices[0] ) eq $senior_heteroatom; push @vertices, shift @vertices; } @vertices = reverse @vertices; for (0..$#vertices) { push @chains, bless { graph => $graph, vertices => [ @vertices ], parent => $parent }, ChemOnomatopist::Chain::Monocycle:: if !$senior_heteroatom || ChemOnomatopist::element( $vertices[0] ) eq $senior_heteroatom; push @vertices, shift @vertices; } my( $max_value ) = sort { _cmp( $a, $b ) } @chains; @chains = grep { !_cmp( $_, $max_value ) } @chains; for (@chains) { $_->{candidate_for} = $self; } return @chains; } sub autosymmetric_equivalents() { my( $self ) = @_; my $cycle = $self->graph->subgraph( [ $self->vertices ] ); # TODO: Add attributes my @chains = map { ChemOnomatopist::Chain::Circular->new( $cycle, @$_ ) } circle_permutations( $self->vertices ); # CHECKME: Additional rules from ChemOnomatopist::filter_chains() might still be needed @chains = sort { ChemOnomatopist::Chain::Monocycle::_cmp( $a, $b ) } @chains; @chains = grep { !ChemOnomatopist::Chain::Monocycle::_cmp( $_, $chains[0] ) } @chains; @chains = map { bless { graph => $self->graph, vertices => [ $_->vertices ] } } @chains; return ChemOnomatopist::rule_lowest_numbered_locants( @chains ); } sub parent(;$) { my( $self, $parent ) = @_; my $old_parent = $self->SUPER::parent( $parent ); return $old_parent unless $parent; return $old_parent if $old_parent && $parent == $old_parent; # Addition/change of parent may need resetting the numbering my @candidates = $self->candidates; @candidates = rule_lowest_parent_locant( @candidates ) if defined $self->parent_locant; @candidates = ChemOnomatopist::filter_chains( @candidates ); $self->{vertices} = [ $candidates[0]->vertices ]; return $old_parent; } sub needs_heteroatom_locants() { my( $self ) = @_; return '' if $self->is_hydrocarbon; return '' if $self->is_monoreplaced && !$self->is_substituted; return $self->length < 3 || $self->length > 10 || all { ChemOnomatopist::element( $_ ) ne 'C' } $self->vertices; } sub needs_heteroatom_names() { my( $self ) = @_; return '' if $self->is_hydrocarbon; return $self->length < 3 || $self->length > 10 || all { ChemOnomatopist::element( $_ ) ne 'C' } $self->vertices; } sub needs_indicated_hydrogens() { my( $self ) = @_; return '' if $self->is_hydrocarbon && $self->length == 6; # BBv3 P-31.2.3.1 return '' if $self->is_saturated; # BBv3 P-31.2.3.2 return '' unless $self->is_Hantzsch_Widman; return 1; } sub prefix() { my( $self ) = @_; my $parent = $self->parent; my $name = $self->suffix; if( $name eq 'benzene' ) { if( $parent && blessed $parent && ( $parent->isa( ChemOnomatopist::Group::Sulfinyl:: ) || $parent->isa( ChemOnomatopist::Group::Sulfonyl:: ) ) ) { # Rule derived from examples in BBv2 P-63.6 return $name; } return ChemOnomatopist::Name->new( 'phenyl' ); } $name = ChemOnomatopist::Name->new( $name ) unless blessed $name; $name->pop_e; pop @$name if $name->ends_with_alkane_an_suffix; if( $parent && !$self->is_homogeneous ) { # FIXME: Order of vertices seems to be established regardless of the attachments. # This causes ambiguity, for example, in 1-(1,4-diazepan-1-ylsulfonyl)-8-methylisoquinoline my @vertices = $self->vertices; my $position = first { $self->graph->has_edge( $parent, $vertices[$_] ) } 0..$#vertices; die "unknown locant in multicyclic compound\n" unless defined $position; $name->append_substituent_locant( $self->locants( $position ) ); } $name .= 'yl'; return $name; } sub suffix() { my( $self ) = @_; my $graph = $self->graph; my $SMILES = $self->backbone_SMILES; # Check the preserved names if( $self->length == 5 && $self->number_of_double_bonds && exists $five_membered_aromatic_single_heteroatom{join( '', $self->heteroatoms )} ) { return ChemOnomatopist::Name->new( $five_membered_aromatic_single_heteroatom{join( '', $self->heteroatoms )} ); } return ChemOnomatopist::Name->new( $names{$SMILES} ) if exists $names{$SMILES}; # Check for aromatic notation if( $SMILES =~ /:/ ) { $SMILES =~ s/([a-z]):/'' . uc( $1 )/ge; $SMILES =~ s/\[([a-z]{1,2})\]:/'[' . uc( $1 ) . ']'/ge; for my $SMILES_for_name (keys %names) { next unless $SMILES_for_name =~ /=/; my $name = $names{$SMILES_for_name}; $SMILES_for_name =~ s/=//g; return ChemOnomatopist::Name->new( $name ) if $SMILES eq $SMILES_for_name; } } # Check for annulenes if( $self->is_hydrocarbon && $self->is_aromatic && $self->length =~ /^(4|6|8|10|12|14|16)$/ ) { return ChemOnomatopist::Name->new( 'cyclo' . ChemOnomatopist::IUPAC_numerical_multiplier( $self->length, 1 ) . ChemOnomatopist::IUPAC_numerical_multiplier( $self->length / 2, 1 ) . 'ene' ); } # Check for cycloalkanes if( $self->is_hydrocarbon ) { my $name = ChemOnomatopist::Name::Part::NondetachablePrefix->new( 'cyclo' )->to_name; $name .= $self->SUPER::suffix; return $name; } if( $self->is_Hantzsch_Widman ) { # Hantzsch-Widman names (BBv2 P-22.2.2.1) # Collect the types of heteroatoms and their attachment positions my %heteroatoms; my @vertices = $self->vertices; for my $i (0..$#vertices) { my $symbol = ChemOnomatopist::element( $vertices[$i] ); next if $symbol eq 'C'; $heteroatoms{$symbol} = [] unless $heteroatoms{$symbol}; push @{$heteroatoms{$symbol}}, $i; } my $least_senior_element; my @heteroatom_locants; for my $element (sort { $elements{$a}->{seniority} <=> $elements{$b}->{seniority} } keys %heteroatoms) { push @heteroatom_locants, @{$heteroatoms{$element}}; $least_senior_element = $element; } my $name = ChemOnomatopist::Name->new; unless( @heteroatom_locants == 1 || (scalar keys %heteroatoms == 1 && @heteroatom_locants == $self->length - 1) ) { # Locants are omitted according to BBv2 P-22.2.2.1.7 $name->append_locants( map { $_ + 1 } @heteroatom_locants ); } for my $element (sort { $elements{$a}->{seniority} <=> $elements{$b}->{seniority} } keys %heteroatoms) { if( @{$heteroatoms{$element}} > 1 ) { $name->append_multiplier( ChemOnomatopist::IUPAC_numerical_multiplier( scalar @{$heteroatoms{$element}} ) ); } $name->append_element( exists $elements{$element}->{HantzschWidman} ? $elements{$element}->{HantzschWidman} : $elements{$element}->{prefix} ); } $name->[-1] =~ s/a$//; if( $self->length <= 5 ) { my @stems = ( 'ir', 'et', 'ol' ); $name->append_stem( $stems[$self->length - 3] ); if( $self->is_saturated ) { $name .= $heteroatoms{N} ? 'idine' : 'ane'; } elsif( $self->length == 3 ) { $name .= $heteroatoms{N} ? 'ene' : 'ine'; } else { $name .= 'e'; } return $name; } elsif( $self->length == 6 ) { if( ($elements{$least_senior_element}->{seniority} >= 5 && $elements{$least_senior_element}->{seniority} <= 8) || $least_senior_element eq 'Bi' ) { $name .= $self->is_saturated ? 'ane' : 'ine'; } elsif( ($elements{$least_senior_element}->{seniority} >= 16 && $elements{$least_senior_element}->{seniority} <= 19) || $least_senior_element eq 'N' ) { $name .= $self->is_saturated ? 'inane' : 'ine'; } else { $name .= $self->is_saturated ? 'inane' : 'inine'; } return $name; } elsif( $self->length >= 7 ) { my @stems = ( 'ep', 'oc', 'on', 'ec' ); $name->append_stem( $stems[$self->length - 7] ); $name .= $self->is_saturated ? 'ane' : 'ine'; return $name; } } my $name = ChemOnomatopist::Name->new( 'cyclo' ); $name .= $self->SUPER::suffix; return $name; } sub rule_lowest_parent_locant { all_min { $_->parent_locant } @_ } # FIXME: Pay attention to bond orders # Orders the atoms by their seniority and compares the resulting locants sub _cmp { my( $A, $B ) = @_; my @A_positions = $A->heteroatom_positions; my @B_positions = $B->heteroatom_positions; return cmp_arrays( \@A_positions, \@B_positions ) if cmp_arrays( \@A_positions, \@B_positions ); my @A_heteroatoms = $A->heteroatoms; my @B_heteroatoms = $B->heteroatoms; @A_positions = map { $A_positions[$_] } sort { $elements{$A_heteroatoms[$a]}->{seniority} <=> $elements{$A_heteroatoms[$b]}->{seniority} } 0..$#A_positions; @B_positions = map { $B_positions[$_] } sort { $elements{$B_heteroatoms[$a]}->{seniority} <=> $elements{$B_heteroatoms[$b]}->{seniority} } 0..$#B_positions; return cmp_arrays( \@A_positions, \@B_positions ); } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Monocycle/000077500000000000000000000000001463750375500235515ustar00rootroot00000000000000ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Monocycle/Fused.pm000066400000000000000000000025451463750375500251630ustar00rootroot00000000000000package ChemOnomatopist::Chain::Monocycle::Fused; use strict; use warnings; # ABSTRACT: Monocyclic group which is fused to some other cycle # VERSION use parent ChemOnomatopist::Chain::Monocycle::; use ChemOnomatopist; use ChemOnomatopist::Util::SMILES qw( cycle_SMILES ); use List::Util qw( all ); sub new { my( $class, $graph, $system, @vertices ) = @_; return bless { graph => $graph, system => $system, vertices => \@vertices }, $class; } sub system() { my( $self ) = @_; return $self->{system}; } sub backbone_SMILES() { my( $self ) = @_; return cycle_SMILES( $self->graph, $self->vertices ); } # Returns a copy of the monocycle flipped around the bridge sub flipped() { my( $self ) = @_; my @vertices = $self->vertices; my @bridge = splice @vertices, -2; my @flipped = ( reverse( @vertices ), reverse( @bridge ) ); return ChemOnomatopist::Chain::Monocycle::Fused->new( $self->graph, $self->system, @flipped ); } # Find the best orientation between self and flipped self. # Return 1 if flipped. sub orient() { my( $self ) = @_; my( $chain ) = ChemOnomatopist::filter_chains( $self, $self->flipped ); return '' if $chain == $self; $self->{vertices} = $chain->{vertices}; return 1; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Monospiro.pm000066400000000000000000000100131463750375500241370ustar00rootroot00000000000000package ChemOnomatopist::Chain::Monospiro; # ABSTRACT: Monospiro compound # VERSION use strict; use warnings; use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist; use Graph::Traversal::DFS; sub new { my( $class, $graph, @vertices ) = @_; my $subgraph = $graph->subgraph( \@vertices ); my( $spiro_atom ) = grep { $subgraph->degree( $_ ) == 4 } @vertices; $subgraph->delete_vertex( $spiro_atom ); # Graph is broken into components. # Each component is represented as an array of vertices in the order of traverse. my @components; for my $component (sort { @$a <=> @$b } $subgraph->connected_components) { my( $start ) = sort { $subgraph->degree( $a ) <=> $subgraph->degree( $b ) } @$component; push @components, [ Graph::Traversal::DFS->new( $subgraph, start => $start )->dfs ]; } return bless { graph => $graph, spiro_atom => $spiro_atom, components => \@components }, $class; } sub candidates() { my( $self ) = @_; # "Numbering starts in the smaller ring, if one is smaller, at a ring atom next to the spiro atom and proceeds first around that ring, then through the spiro atom and around the second ring." my $graph = $self->graph; my $spiro_atom = $self->{spiro_atom}; my( $A, $B ) = $self->components; my @candidates; push @candidates, $self, bless( { graph => $graph, spiro_atom => $spiro_atom, components => [ $A, [ reverse @$B ] ], candidate_for => $self } ), bless( { graph => $graph, spiro_atom => $spiro_atom, components => [ [ reverse @$A ], $B ], candidate_for => $self } ), bless( { graph => $graph, spiro_atom => $spiro_atom, components => [ [ reverse @$A ], [ reverse @$B ] ], candidate_for => $self } ); if( @$A == @$B ) { push @candidates, bless( { graph => $graph, spiro_atom => $spiro_atom, components => [ $B, $A ], candidate_for => $self } ), bless( { graph => $graph, spiro_atom => $spiro_atom, components => [ $B, [ reverse @$A ] ], candidate_for => $self } ), bless( { graph => $graph, spiro_atom => $spiro_atom, components => [ [ reverse @$B ], $A ], candidate_for => $self } ), bless( { graph => $graph, spiro_atom => $spiro_atom, components => [ [ reverse @$B ], [ reverse @$A ] ], candidate_for => $self } ); } return @candidates; } sub components() { my( $self ) = @_; return @{$self->{components}}; } sub vertices() { my( $self ) = @_; my( $A, $B ) = $self->components; my @vertices = ( @$A, $self->{spiro_atom}, @$B ); return @vertices; } sub has_form($$) { my( $class, $graph ) = @_; my %degrees = map { $graph->degree( $_ ) => 1 } $graph->vertices; return '' unless join( ',', sort keys %degrees ) eq '2,4'; my @d4 = grep { $graph->degree( $_ ) == 4 } $graph->vertices; return '' unless @d4 == 1; return 1; } sub needs_charge_locants() { 1 } sub needs_isotope_locants() { 1 } sub needs_multiple_bond_locants() { 1 } sub needs_substituent_locants() { 1 } sub locants() { shift; return map { $_ + 1 } @_ } sub number_of_rings() { 2 } sub prefix() { my( $self ) = @_; my $name = ChemOnomatopist::Name->new( 'spiro' ); $name .= ChemOnomatopist::Name::Part::Fusion->new( '[' . join( '.', map { scalar @$_ } $self->components ) . ']' ); $name .= ChemOnomatopist::alkane_chain_name( $self->length ) . 'an'; if( $self->parent ) { my @vertices = $self->vertices; my( $position ) = grep { $self->graph->has_edge( $self->parent, $vertices[$_] ) } 0..$#vertices; die "unknown locant in multicyclic compound\n" unless defined $position; $name->append_substituent_locant( $self->locants( $position ) ); } $name .= 'yl'; return $name; } sub suffix() { my( $self ) = @_; my $name = ChemOnomatopist::Name->new( 'spiro' ); $name .= ChemOnomatopist::Name::Part::Fusion->new( '[' . join( '.', map { scalar @$_ } $self->components ) . ']' ); $name .= $self->SUPER::suffix; return $name; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Phenanthrene.pm000066400000000000000000000076271463750375500246120ustar00rootroot00000000000000package ChemOnomatopist::Chain::Phenanthrene; # ABSTRACT: Phenanthrene or its derivative # VERSION use strict; use warnings; use ChemOnomatopist::Chain::Polyaphene; use ChemOnomatopist::Name; use ChemOnomatopist::Util::Graph qw( merge_graphs ); use Graph::Undirected; use List::Util qw( first all any ); use parent ChemOnomatopist::Chain::Polyaphene::; sub new { my( $class, $graph, @cycles ) = @_; my $subgraph = $graph->subgraph( map { $_->vertices } @cycles ); # Deleting all edges having degree 3 vertices at both ends $subgraph->delete_edges( map { @$_ } grep { $subgraph->degree( $_->[0] ) == 3 && $subgraph->degree( $_->[1] ) == 3 } $subgraph->edges ); # Find an order my $start = first { $subgraph->degree( $_ ) == 1 } $subgraph->vertices; my @vertices = Graph::Traversal::DFS->new( $subgraph, start => $start )->dfs; # Adjust the order if( any { ChemOnomatopist::element( $_ ) eq 'N' } @vertices ) { # Find the order so as N is closest to the begining of the chain # CHECKME: This might not be correct due to offset my $first = first { ChemOnomatopist::element( $vertices[$_] ) eq 'N' } 0..$#vertices; my $last = first { ChemOnomatopist::element( $vertices[-1-$_] ) eq 'N' } 0..$#vertices; @vertices = reverse @vertices if $last < $first; push @vertices, shift @vertices; # Phenanthridine has a strict order if( (grep { ChemOnomatopist::element( $_ ) eq 'N' } @vertices) == 1 && ChemOnomatopist::element( $vertices[5] ) ne 'N' ) { die "cannot handle complicated cyclic compounds\n"; } } else { for (1..5) { push @vertices, shift @vertices; } @vertices = reverse @vertices; } return bless { graph => $graph, vertices => \@vertices }, $class; } sub candidates() { my( $self ) = @_; my @candidates = ( $self ); if( $self->is_hydrocarbon ) { my @vertices = $self->vertices; push @candidates, bless { graph => $self->graph, vertices => [ reverse map { $vertices[$_] } ( 10..13, 0..9 ) ], candidate_for => $self }; } return @candidates; } sub ideal_graph($) { my( $class ) = @_; my @graphs; for (0..1) { my $graph = Graph::Undirected->new( refvertexed => 1 ); $graph->add_cycle( map { { symbol => 'C', number => $_-1 } } 1..6 ); push @graphs, $graph; } my $graph = merge_graphs( @graphs ); # Pick an edge from each graph my( $A ) = $graphs[0]->edges; my( $B ) = $graphs[1]->edges; # Join a pair of atoms with an edge $graph->add_edge( $A->[0], $B->[0] ); # Add a longer arc between other two atoms $graph->add_path( $A->[1], { symbol => 'C' }, { symbol => 'C' }, $B->[1] ); return $graph; } sub needs_heteroatom_locants() { my( $self ) = @_; return $self->number_of_heteroatoms == 2; } sub needs_heteroatom_names() { '' } sub prefix() { my( $self ) = @_; # TODO: Check the heteroatom sites my $heteroatom_positions = join ',', $self->heteroatom_positions; if( all { $_ eq 'N' } $self->heteroatoms ) { return ChemOnomatopist::Name->new( 'phenanthridine' ) if $self->number_of_heteroatoms == 1; return ChemOnomatopist::Name->new( 'phenanthroline' ) if $self->number_of_heteroatoms == 2; } if( $self->number_of_heteroatoms == 1 ) { return ChemOnomatopist::Name->new( 'arsanthridine' ) if all { $_ eq 'As' } $self->heteroatoms; return ChemOnomatopist::Name->new( 'phosphanthridine' ) if all { $_ eq 'P' } $self->heteroatoms; } return ChemOnomatopist::Name->new( 'phenanthrene' ) if $self->is_hydrocarbon; die "unknown phenanthrene derivative\n"; } sub suffix() { $_[0]->prefix } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Picene.pm000066400000000000000000000052261463750375500233670ustar00rootroot00000000000000package ChemOnomatopist::Chain::Picene; # ABSTRACT: Picene chain # VERSION use strict; use warnings; use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist::Name; use ChemOnomatopist::Util::Graph qw( graph_without_edge_attributes ); use Graph::Nauty qw( are_isomorphic ); use Graph::Traversal::DFS; use Graph::Undirected; use List::Util qw( first ); use Set::Object qw( set ); sub new { my( $class, $graph, @cycles ) = @_; my $subgraph = $graph->subgraph( map { $_->vertices } @cycles ); # Isolating a path of vertices with degree of 3 my $subgraph_d3 = $subgraph->subgraph( grep { $subgraph->degree( $_ ) == 3 } $subgraph->vertices ); my $d3_start = first { $subgraph_d3->degree( $_ ) == 1 } $subgraph_d3->vertices; my @d3_path = reverse Graph::Traversal::DFS->new( $subgraph_d3, start => $d3_start )->dfs; my $first = first { !set( @d3_path )->has( $_ ) } $subgraph->neighbours( $d3_path[1] ); $subgraph->delete_edge( $first, $d3_path[1] ); # Taking pairs of vertices along the path, deleting edges between them while( @d3_path ) { $subgraph->delete_edge( shift @d3_path, shift @d3_path ); } my @vertices = reverse Graph::Traversal::DFS->new( $subgraph, start => $first )->dfs; return bless { graph => $graph, vertices => \@vertices }, $class; } sub candidates() { my( $self ) = @_; my @candidates = ( $self ); my @vertices = reverse $self->vertices; for (1..6) { push @vertices, shift @vertices; } push @candidates, bless { graph => $self->graph, vertices => \@vertices, candidate_for => $self }; return @candidates; } sub has_form($$) { my( $class, $graph ) = @_; my @vertices = $graph->vertices; return '' unless @vertices == 22; return are_isomorphic( graph_without_edge_attributes( $graph ), $class->ideal_graph, sub { ChemOnomatopist::element( $_[0] ) } ); } sub ideal_graph($) { my( $class ) = @_; my $graph = Graph::Undirected->new( refvertexed => 1 ); my @vertices = map { { symbol => 'C' } } 1..8; $graph->add_path( @vertices ); $graph->add_cycle( @vertices[0..1], map { { symbol => 'C' } } 1..4 ); $graph->add_cycle( @vertices[6..7], map { { symbol => 'C' } } 1..4 ); for (0..2) { $graph->add_cycle( @vertices[0+(2*$_)..3+(2*$_)], map { { symbol => 'C' } } 1..2 ); } return $graph; } sub number_of_rings() { 5 } sub prefix() { ChemOnomatopist::Name->new( 'picene' ) } sub suffix() { $_[0]->prefix } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Polyacene.pm000066400000000000000000000113201463750375500240730ustar00rootroot00000000000000package ChemOnomatopist::Chain::Polyacene; # ABSTRACT: Polyacenes, including anthracene # VERSION use strict; use warnings; use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist::Name; use ChemOnomatopist::Util::Graph qw( graph_without_edge_attributes subgraph ); use Graph::Nauty qw( are_isomorphic ); use Graph::Undirected; use List::Util qw( first ); use Set::Object qw( set ); sub new { my( $class, $graph, @cycles ) = @_; my @vertices = map { $_->vertices } @cycles; my $subgraph = subgraph( $graph, @vertices ); $subgraph->delete_vertices( grep { $subgraph->degree( $_ ) == 3 } $subgraph->vertices ); $subgraph->delete_vertices( grep { !$subgraph->degree( $_ ) } $subgraph->vertices ); my $start = first { $subgraph->degree( $_ ) == 1 } $subgraph->vertices; $subgraph = subgraph( $graph, @vertices ); # Restore the subgraph my $last = first { $subgraph->degree( $_ ) == 3 } $subgraph->neighbours( $start ); $subgraph->delete_edges( $start, $last, map { @$_ } grep { $subgraph->degree( $_->[0] ) == 3 && $subgraph->degree( $_->[1] ) == 3 } $subgraph->edges ); @vertices = reverse Graph::Traversal::DFS->new( $subgraph, start => $start )->dfs; return bless { graph => $graph, vertices => \@vertices }, $class; } sub candidates() { my( $self ) = @_; my @candidates = ( $self, $self->flipped_vertically, $self->flipped_horizontally, $self->flipped_vertically->flipped_horizontally ); for (1..$#candidates) { $candidates[$_]->{candidate_for} = $self; } return @candidates; } sub flipped_horizontally() { my( $self ) = @_; my $subgraph = subgraph( $self->graph, $self->vertices ); my @chords = grep { $subgraph->degree( $_->[0] ) == 3 && $subgraph->degree( $_->[1] ) == 3 } $subgraph->edges; $subgraph->delete_edges( $self->{vertices}[$self->length / 2 + 3], $self->{vertices}[$self->length / 2 + 4], map { @$_ } @chords ); return bless { graph => $self->graph, vertices => [ reverse Graph::Traversal::DFS->new( $subgraph, start => $self->{vertices}[$self->length / 2 + 3] )->dfs ] }; } sub flipped_vertically() { my( $self ) = @_; my $subgraph = subgraph( $self->graph, $self->vertices ); my @chords = grep { $subgraph->degree( $_->[0] ) == 3 && $subgraph->degree( $_->[1] ) == 3 } $subgraph->edges; $subgraph->delete_edges( $self->{vertices}[3], $self->{vertices}[4], map { @$_ } @chords ); return bless { graph => $self->graph, vertices => [ reverse Graph::Traversal::DFS->new( $subgraph, start => $self->{vertices}[3] )->dfs ] }; } sub needs_indicated_hydrogens() { 1 } sub needs_substituent_locants() { 1 } sub has_form($$) { my( $class, $graph ) = @_; my %degrees = map { $graph->degree( $_ ) => 1 } $graph->vertices; return '' unless join( ',', sort keys %degrees ) eq '2,3'; my $N = scalar $graph->vertices; return '' if $N < 14; return '' if ($N - 14) % 4; return are_isomorphic( graph_without_edge_attributes( $graph ), $class->ideal_graph( $N ), sub { return 'C' } ); } sub ideal_graph($$) { my( $class, $N ) = @_; die "cannot construct polyacene with $N vertices\n" if $N < 10 || ($N-2) % 4; my $graph = Graph::Undirected->new( refvertexed => 1 ); my @vertices = map { { symbol => 'C', number => $_-1 } } 1..$N; $graph->add_cycle( @vertices ); for (0..($N-6) / 4 -1) { $graph->add_edge( map { $vertices[$_] } ( 4 + 2*$_, $N - 1 - 2*$_ ) ); } return $graph; } sub locants(@) { my $self = shift; my @locant_map; if( $self->length == 14 ) { @locant_map = ( 1..4, '4a', 10, '10a', 5..8, '8a', 9, '9a' ); } else { my $N = ($self->length - 2) / 4; @locant_map = ( 1..3, ( map { ($_ + 2, ($_ + 2) . 'a') } 2..$N ), ($N+3)..($N+5), ( map { ($_ + 9, ($_ + 9) . 'a') } 2..$N ) ); } return map { $locant_map[$_] } @_; } sub number_of_rings() { ($_[0]->length - 2) / 4 } sub suffix { my( $self ) = @_; return ChemOnomatopist::Name->new( 'anthracene' ) if $self->length == 14; return ChemOnomatopist::Name->new( ChemOnomatopist::IUPAC_numerical_multiplier( $self->number_of_rings ) . 'acene' ); } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Polyaphene.pm000066400000000000000000000142041463750375500242640ustar00rootroot00000000000000package ChemOnomatopist::Chain::Polyaphene; use strict; use warnings; # ABSTRACT: Polyaphenes, including phenanthrene # VERSION use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist::Util::Graph qw( graph_without_edge_attributes merge_graphs subgraph ); use Graph::Nauty qw( are_isomorphic ); use Graph::Undirected; use Set::Object qw( set ); sub new { my( $class, $graph, @cycles ) = @_; my $subgraph = subgraph( $graph, map { $_->vertices } @cycles ); # Constructing the connectivity graph for cycles my $connectivity_graph = Graph::Undirected->new( refvertexed => 1 ); for my $vertex ($subgraph->vertices) { next unless $subgraph->degree( $vertex ) == 3; $connectivity_graph->add_edge( grep { set( $_->vertices )->has( $vertex ) } @cycles ); } # Detecting the common ring which has three edges having degree 3 at their ends my $common_ring; for my $cycle (@cycles) { next unless scalar( grep { $subgraph->degree( $_->[0] ) == 3 && $subgraph->degree( $_->[1] ) == 3 } subgraph( $graph, $cycle->vertices )->edges ) == 3; $common_ring = $cycle; } # Finding the correct order of cycles, flipping if needed my $common_ring_pos = @cycles % 2 ? (@cycles - 1) / 2 : @cycles / 2 - 1; my( $start ) = grep { $connectivity_graph->degree( $_ ) == 1 } @cycles; my @cycles_in_order = Graph::Traversal::DFS->new( $connectivity_graph, start => $start )->dfs; if( !@cycles % 2 && $cycles_in_order[$common_ring_pos] != $common_ring ) { @cycles_in_order = reverse @cycles_in_order; } # Finding the atom in the common ring which will get the lowest number $subgraph = subgraph( $graph, $common_ring->vertices ); $subgraph->delete_edges( map { @$_ } map { subgraph( $graph, $_->vertices )->edges } ( $cycles_in_order[$common_ring_pos-1], $cycles_in_order[$common_ring_pos+1] ) ); my( $short_edge ) = grep { $subgraph->degree( $_->[0] ) == 1 && $subgraph->degree( $_->[1] ) == 1 } $subgraph->edges; my( $junction ) = (set( $cycles_in_order[$common_ring_pos-1]->vertices ) * set( @$short_edge ))->members; # Finding the candidates of the starting atom $subgraph = subgraph( $graph, $cycles_in_order[0]->vertices ); $subgraph->delete_vertices( $cycles_in_order[1]->vertices ); my @candidates = grep { $subgraph->degree( $_ ) == 1 } $subgraph->vertices; # Finding the first and the last atom in the enumeration order $subgraph = subgraph( $graph, map { $_->vertices } @cycles ); my $shortest_paths = $subgraph->single_source_shortest_paths( $junction ); my $min_length; my $first; for my $vertex (@candidates) { my $length = 0; my $v = $vertex; while( $shortest_paths->has_vertex_attribute( $v, 'p' ) ) { $v = $shortest_paths->get_vertex_attribute( $v, 'p' ); $length++; } if( !defined $min_length || $min_length > $length ) { $min_length = $length; $first = $vertex; } } my( $last ) = grep { $subgraph->degree( $_ ) == 3 } $subgraph->neighbours( $first ); # Deleting chords and connection between first and last atoms $subgraph->delete_edges( map { (set( $cycles_in_order[$_ ]->vertices ) * set( $cycles_in_order[$_+1]->vertices ))->members } 0..$#cycles-1 ); $subgraph->delete_edge( $first, $last ); my @vertices = Graph::Traversal::DFS->new( $subgraph, start => $last )->dfs; return bless { graph => $graph, vertices => \@vertices }, $class; } sub candidates() { my( $self ) = @_; my @candidates = ( $self ); if( (($self->length - 6) / 4) % 2 == 0 ) { my @vertices = reverse $self->vertices; my $N = ($self->length - 6) / 4 / 2; # Number of rings in one "line" for (1..($N-1)*4+2) { push @vertices, shift @vertices; } push @candidates, bless { graph => $self->graph, vertices => \@vertices, candidate_for => $self }; } return @candidates; } sub needs_substituent_locants() { 1 } sub has_form($$) { my( $class, $graph ) = @_; my %degrees = map { $graph->degree( $_ ) => 1 } $graph->vertices; return '' unless join( ',', sort keys %degrees ) eq '2,3'; my $N = scalar $graph->vertices; return '' if $N < 6 + 3 * 4; return '' if ($N - 6) % 4; return are_isomorphic( graph_without_edge_attributes( $graph ), $class->ideal_graph( $N ), sub { 'C' } ); } sub ideal_graph($$) { my( $class, $N ) = @_; my @sizes; if( ($N - 6) / 4 % 2 ) { # Two unequal branches @sizes = map { ( ( ($N - 6) / 4 - 1 ) / 2 + $_ ) * 4 + 2 } (0, 1); } else { # Two equal-sized branches @sizes = ( ($N - 2)/2, ($N - 2)/2 ); } my @graphs = map { ChemOnomatopist::Chain::Polyacene->ideal_graph( $_ ) } @sizes; my $graph = merge_graphs( @graphs ); # Locate the terminal edges my @termini; for my $g (@graphs) { $g->delete_vertices( map { $g->neighbours( $_ ) } grep { $g->degree( $_ ) == 3 } $g->vertices ); $g->delete_vertices( grep { $g->degree( $_ ) == 3 } $g->vertices ); my( $terminus ) = $g->edges; push @termini, $terminus; } # Create the common ring (BBv2 P-25.1.2.2) $graph->add_edge( map { $termini[$_]->[0] } (0, 1) ); $graph->add_path( $termini[0]->[1], { symbol => 'C' }, { symbol => 'C' }, $termini[1]->[1] ); return $graph; } sub number_of_rings() { ($_[0]->length - 2) / 4 } sub prefix() { my( $self ) = @_; return ChemOnomatopist::IUPAC_numerical_multiplier( $self->number_of_rings ) . 'aphene'; } sub suffix() { $_[0]->prefix } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Polyhelicene.pm000066400000000000000000000060201463750375500245750ustar00rootroot00000000000000package ChemOnomatopist::Chain::Polyhelicene; # ABSTRACT: Polyhelicenes # VERSION use strict; use warnings; use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist::Util::Graph qw( graph_without_edge_attributes subgraph ); use Graph::Nauty qw( are_isomorphic ); use Graph::Undirected; use List::Util qw( first ); use Set::Object qw( set ); sub new { my( $class, $graph, @cycles ) = @_; my @vertices = map { $_->vertices } @cycles; # Take a subgraph of only triple-connected vertices my $g3 = subgraph( $graph, @vertices ); # Leave only triple-connected vertices $g3->delete_vertices( grep { $g3->degree( $_ ) < 3 } $g3->vertices ); # There will be only two two-connected vertices now my $end = first { $g3->degree( $_ ) == 2 } $g3->vertices; # Take a subgraph of all participating vertices my $subgraph = subgraph( $graph, @vertices ); # Find the first vertex my $start = first { $subgraph->degree( $_ ) == 2 } $subgraph->neighbours( $end ); # Delete all bridges from the subgraph and the last edge as well $subgraph->delete_edges( $start, $end, map { @$_ } grep { $subgraph->degree( $_->[0] ) == 1 || $subgraph->degree( $_->[1] ) == 1 } $subgraph->edges ); @vertices = reverse( Graph::Traversal::DFS->new( $subgraph, start => $start )->dfs ); return bless { graph => $graph, vertices => \@vertices }, $class; } sub candidates() { my( $self ) = @_; my @candidates = ( $self ); my @vertices = reverse $self->vertices; for (1..5) { push @vertices, shift @vertices; } push @candidates, bless { graph => $self->graph, vertices => \@vertices, candidate_for => $self }; return @candidates; } sub needs_substituent_locants() { 1 } sub has_form($$) { my( $class, $graph ) = @_; my %degrees = map { $graph->degree( $_ ) => 1 } $graph->vertices; return '' unless join( ',', sort keys %degrees ) eq '2,3'; my $N = scalar $graph->vertices; return '' if ($N-2) % 4; return '' if ($N-2) / 4 < 6; return are_isomorphic( graph_without_edge_attributes( $graph ), $class->ideal_graph( $N ), sub { 'C' } ); } sub ideal_graph($$) { my( $class, $N ) = @_; die "cannot construct helicene with $N vertices\n" if ($N-2) % 4 || ($N-2) / 4 < 6; my $graph = Graph::Undirected->new( refvertexed => 1 ); $graph->add_cycle( map { { symbol => 'C' } } 1..6 ); my( $edge ) = $graph->edges; for (2..($N-2) / 4) { my @vertices = map { { symbol => 'C' } } 1..4; $graph->add_path( $edge->[0], @vertices, $edge->[1] ); $edge = [ @vertices[0..1] ]; } return $graph; } sub number_of_rings() { ($_[0]->length - 2) / 4 } sub suffix { my( $self ) = @_; return ChemOnomatopist::IUPAC_numerical_multiplier( $self->number_of_rings, 1 ) . 'helicene'; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Porphyrin.pm000066400000000000000000000027301463750375500241530ustar00rootroot00000000000000package ChemOnomatopist::Chain::Porphyrin; # ABSTRACT: Porphyrin compound # VERSION use strict; use warnings; use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist::Name; use ChemOnomatopist::Util::Graph qw( graph_without_edge_attributes ); use Graph::Nauty qw( are_isomorphic ); use Graph::Undirected; sub new { my( $class, $graph, @vertices ) = @_; return bless { graph => $graph, vertices => \@vertices }, $class; } sub needs_heteroatom_locants() { '' } sub needs_heteroatom_names() { '' } sub has_form($$) { my( $class, $graph ) = @_; my @vertices = $graph->vertices; return '' unless @vertices == 24; return '' unless (grep { ChemOnomatopist::is_element( $_, 'C' ) } @vertices) == 20; return '' unless (grep { ChemOnomatopist::is_element( $_, 'N' ) } @vertices) == 4; return are_isomorphic( graph_without_edge_attributes( $graph ), $class->ideal_graph, sub { ChemOnomatopist::element( $_[0] ) } ); } sub ideal_graph($) { my( $class ) = @_; my $graph = Graph::Undirected->new( refvertexed => 1 ); my @vertices = map { { symbol => 'C' } } 1..20; $graph->add_cycle( @vertices ); for (0..3) { $graph->add_path( $vertices[$_ * 5], { symbol => 'N' }, $vertices[$_ * 5 + 3] ); } return $graph; } sub number_of_rings() { 5 } sub prefix() { ChemOnomatopist::Name->new( 'porphyrin' ) } sub suffix() { ChemOnomatopist::Name->new( 'porphyrin' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Sulfimide.pm000066400000000000000000000011231463750375500240750ustar00rootroot00000000000000package ChemOnomatopist::Chain::Sulfimide; # ABSTRACT: Sulfimide chain # VERSION use strict; use warnings; use ChemOnomatopist::Name; use parent ChemOnomatopist::Group::, ChemOnomatopist::Chain::; sub new { my( $class, $graph, @vertices ) = @_; return bless { graph => $graph, vertices => \@vertices }, $class; } sub needs_heteroatom_locants() { '' } sub needs_heteroatom_names() { '' } sub locants($@) { my $self = shift; map { $_ ? $self->{vertices}[1]->{symbol} : 'N' } @_; } sub suffix() { ChemOnomatopist::Name->new( 'sulfanimine' ) } # FIXME: Support Se and Te 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/VonBaeyer.pm000066400000000000000000000126761463750375500240650ustar00rootroot00000000000000package ChemOnomatopist::Chain::VonBaeyer; # ABSTRACT: Von Baeyer hydrocarbon # VERSION use strict; use warnings; use ChemOnomatopist::Name; use ChemOnomatopist::Name::Part::Fusion; use Graph::Traversal::DFS; use List::Util qw( first sum0 ); use parent ChemOnomatopist::Chain::; sub new { my( $class, $graph, @vertices ) = @_; my $subgraph = $graph->subgraph( \@vertices ); my @d3 = grep { $subgraph->degree( $_ ) == 3 } @vertices; $subgraph->delete_vertices( @d3 ); # According to BBv3 P-23.2.1 and P-23.2.3, cycles should be ordered in decreasing size my @components = sort { @$b <=> @$a } $subgraph->connected_components; $subgraph = $graph->subgraph( \@vertices ); my $first_of_bridge = first { $subgraph->has_edge( $d3[0], $_ ) } @{$components[-1]}; my $last_of_bridge = first { $subgraph->has_edge( $d3[1], $_ ) } @{$components[-1]}; # Disconnect the main bridge $subgraph->delete_edge( $d3[0], $first_of_bridge ); $subgraph->delete_edge( $d3[1], $last_of_bridge ); # Find the last atom of the main ring my $last_of_the_main = first { $subgraph->has_edge( $d3[0], $_ ) } @{$components[1]}; $subgraph->delete_edge( $d3[0], $last_of_the_main ); # Disconnect # Connect in order to get the correct numbering $subgraph->add_edge( $last_of_the_main, $first_of_bridge ); @vertices = reverse Graph::Traversal::DFS->new( $subgraph, start => $d3[0] )->dfs; return bless { graph => $graph, vertices => \@vertices, sizes => [ map { scalar @$_ } @components ] }, $class; } sub candidates() { my( $self ) = @_; my @candidates = ( $self ); my @sizes = @{$self->{sizes}}; if( $sizes[0] == $sizes[1] && $sizes[0] == $sizes[2] ) { push @candidates, $self->cycles_swapped( 0, 1 ); push @candidates, $self->cycles_swapped( 0, 2 ); push @candidates, $self->cycles_swapped( 1, 2 ); push @candidates, $self->cycles_swapped( 0, 2 )->cycles_swapped( 0, 1 ); push @candidates, $self->cycles_swapped( 1, 2 )->cycles_swapped( 0, 1 ); } elsif( $sizes[0] == $sizes[1] ) { push @candidates, $self->cycles_swapped( 0, 1 ); } elsif( $sizes[1] == $sizes[2] ) { push @candidates, $self->cycles_swapped( 1, 2 ); } # Flip all the candidates for (0..$#candidates) { push @candidates, $candidates[$_]->flipped; } # Record the original chain for (1..$#candidates) { $candidates[$_]->{candidate_for} = $self; } return @candidates; } sub flipped() { my( $self ) = @_; my $graph = $self->graph; my @vertices = $self->vertices; my $subgraph = $graph->subgraph( \@vertices ); my @sizes = @{$self->{sizes}}; my @d3 = grep { $subgraph->degree( $_ ) == 3 } @vertices; @d3 = reverse @d3 unless $d3[0] == $vertices[0]; # CHECKME: Is this needed? my @vertices_now = ( $d3[1] ); shift @vertices; push @vertices_now, reverse splice @vertices, 0, $sizes[0]; shift @vertices; push @vertices_now, $d3[0]; push @vertices_now, reverse splice @vertices, 0, $sizes[1]; push @vertices_now, reverse splice @vertices, 0, $sizes[2]; return bless { graph => $graph, vertices => \@vertices_now, sizes => \@sizes }; } sub cycles_swapped($$) { my( $self, $A, $B ) = @_; my $graph = $self->graph; my @vertices = $self->vertices; my $subgraph = $graph->subgraph( \@vertices ); my @sizes = @{$self->{sizes}}; my @d2 = grep { $subgraph->degree( $_ ) == 2 } @vertices; my @d3 = grep { $subgraph->degree( $_ ) == 3 } @vertices; @d3 = reverse @d3 unless $d3[0] == $vertices[0]; # CHECKME: Is this needed? my @A = map { $d2[$_] } (sum0 @sizes[0..$A-1])..(sum0 @sizes[0..$A])-1; my @B = map { $d2[$_] } (sum0 @sizes[0..$B-1])..(sum0 @sizes[0..$B])-1; @vertices = @d2; splice @vertices, sum0( @sizes[0..$A-1] ), $sizes[$A], @B; splice @vertices, sum0( @sizes[0..$B-1] ), $sizes[$B], @A; splice @vertices, $sizes[0], 0, $d3[1]; unshift @vertices, $d3[0]; return bless { graph => $graph, vertices => \@vertices, sizes => \@sizes }; } sub has_form($$) { my( $class, $graph ) = @_; my @d2 = grep { $graph->degree( $_ ) == 2 } $graph->vertices; my @d3 = grep { $graph->degree( $_ ) == 3 } $graph->vertices; return '' unless @d3 == 2; return '' unless @d2 + @d3 == scalar $graph->vertices; return '' unless $graph->is_edge_connected; # Must not have bridges return '' if $graph->has_edge( @d3 ); # Reject regular bicycles $graph = $graph->copy->delete_vertices( @d3 ); return scalar( $graph->connected_components ) == 3; } sub prefix() { my( $self ) = @_; my $name = $self->suffix; if( $self->parent ) { # FIXME: Not stable for naphthalene my @vertices = $self->vertices; my( $position ) = grep { $self->graph->has_edge( $self->parent, $vertices[$_] ) } 0..$#vertices; die "unknown locant in multicyclic compound\n" unless defined $position; $name->pop_e; $name->append_substituent_locant( $self->locants( $position ) ); } return $name; } sub suffix() { my( $self ) = @_; return ChemOnomatopist::Name->new( 'bicyclo' ) . ChemOnomatopist::Name::Part::Fusion->new( '[' . join( '.', @{$self->{sizes}} ) . ']' ) . $self->SUPER::suffix; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Chain/Xanthene.pm000066400000000000000000000145641463750375500237430ustar00rootroot00000000000000package ChemOnomatopist::Chain::Xanthene; # ABSTRACT: Xanthene or its close derivative # VERSION use strict; use warnings; use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Util::Graph qw( subgraph ); use List::Util qw( any first uniq ); use Set::Object qw( set ); sub new { my( $class, $graph, @cycles ) = @_; my @benzenes = grep { $_->is_benzene } @cycles; my $other = first { !$_->is_benzene } @cycles; # Safeguard against the other cycle not being in the middle of benzenes if( any { set( $_->vertices )->is_disjoint( set( $other->vertices ) ) } @benzenes ) { die "cannot name xanthene derivatives\n"; } # Find the correct vertex order my $subgraph = subgraph( $graph, map { $_->vertices } @cycles ); my @bridges = grep { $subgraph->degree( $_->[0] ) == 3 && $subgraph->degree( $_->[1] ) == 3 } $subgraph->edges; $subgraph->delete_edges( map { @$_ } @bridges ); my @heteroatom_positions = $other->heteroatom_positions; my @heteroatoms = $other->heteroatoms; if( uniq( @heteroatoms ) == 2 ) { @heteroatom_positions = reverse @heteroatom_positions if $heteroatoms[0] eq 'N'; @heteroatom_positions = reverse @heteroatom_positions if $heteroatoms[1] eq 'O'; @heteroatom_positions = reverse @heteroatom_positions if join( ',', @heteroatoms ) eq 'As,S'; } my @other_vertices = $other->vertices; $subgraph->delete_vertex( $other_vertices[($heteroatom_positions[0] + 2) % 6] ); my $start = first { $subgraph->has_vertex( $_ ) && $subgraph->degree( $_ ) == 1 } map { $_->vertices } @benzenes; my @vertices = ( reverse( Graph::Traversal::DFS->new( $subgraph, start => $start )->dfs ), $other_vertices[($heteroatom_positions[0] + 2) % 6] ); return bless { graph => $graph, vertices => \@vertices }, $class; } sub candidates() { my( $self ) = @_; my @candidates = ( $self, $self->flipped_horizontally ); if( $self->number_of_heteroatoms == 2 && uniq( $self->heteroatoms ) == 1 ) { push @candidates, $self->flipped_vertically, $self->flipped_vertically->flipped_horizontally; } for (1..$#candidates) { $candidates[$_]->{candidate_for} = $self; } return @candidates; } sub flipped_horizontally() { my( $self ) = @_; my @vertices = $self->vertices; return bless { graph => $self->graph, vertices => [ reverse @vertices[11..13], @vertices[0..10] ] }; } sub flipped_vertically() { my( $self ) = @_; my @vertices = reverse $self->vertices; for (1..4) { unshift @vertices, pop @vertices; } return bless { graph => $self->graph, vertices => \@vertices }; } sub locants(@) { my $self = shift; my @locant_map; if( $self->number_of_heteroatoms == 1 && join( '', $self->heteroatoms ) !~ /^(As|P)$/ ) { # Acridarsine and acridophosphine are numbered systematically @locant_map = ( 1..4, '4a', 10, '10a', 5..8, '8a', 9, '9a' ); } else { @locant_map = ( 1..4, '4a', 5, '5a', 6..9, '9a', 10, '10a' ); } return map { $locant_map[$_] } @_; } sub ideal_graph() { ChemOnomatopist::Chain::Polyacene->ideal_graph( 14 ) } sub needs_heteroatom_locants() { '' } sub needs_heteroatom_names() { '' } sub needs_substituent_locants() { 1 } sub number_of_rings() { 3 } sub prefix() { my( $self ) = @_; my $name = $self->suffix; $name->[-1]{value} =~ s/e$//; if( $self->parent ) { my @vertices = $self->vertices; my( $position ) = grep { $self->graph->has_edge( $self->parent, $vertices[$_] ) } 0..$#vertices; die "unknown locant in multicyclic compound\n" unless defined $position; $name->append_substituent_locant( $self->locants( $position ) ); } $name .= 'yl'; return $name; } sub suffix() { my( $self ) = @_; my @heteroatoms = $self->heteroatoms; if( @heteroatoms == 1 ) { if( $heteroatoms[0] eq 'N' ) { return ChemOnomatopist::Name::Part::Stem->new( 'acridine' )->to_name; } elsif( $heteroatoms[0] eq 'As' ) { return ChemOnomatopist::Name::Part::Stem->new( 'acridarsine' )->to_name; } elsif( $heteroatoms[0] eq 'P' ) { return ChemOnomatopist::Name::Part::Stem->new( 'acridophosphine' )->to_name; } my $name = ChemOnomatopist::Name::Part::Locants->new( '9H-' )->to_name; my $stem = ''; if( $heteroatoms[0] ne 'O' ) { $stem .= $elements{$heteroatoms[0]}->{prefix}; $stem =~ s/a$/o/; } $stem .= 'xanthene'; return $name->append_stem( $stem ); } elsif( @heteroatoms == 2 && $heteroatoms[0] eq $heteroatoms[1] && $heteroatoms[0] eq 'N' ) { return ChemOnomatopist::Name::Part::Stem->new( 'phenazine' )->to_name; } elsif( @heteroatoms == 2 && $heteroatoms[0] eq $heteroatoms[1] ) { my $name = $elements{$heteroatoms[0]}->{prefix}; $name =~ s/a$//; return ChemOnomatopist::Name::Part::Stem->new( $name . 'anthrene' )->to_name; } elsif( @heteroatoms == 2 && $heteroatoms[1] eq 'N' ) { # BBv2 P-25.2.2.3 my $name = ChemOnomatopist::Name::Part::Locants->new( '10H-' )->to_name; my $stem = 'pheno'; $stem =~ s/o$// if $elements{$heteroatoms[0]}->{prefix} =~ /^o/; $name->append_stem( $stem . $elements{$heteroatoms[0]}->{prefix} . 'zine' ); return $name; } elsif( @heteroatoms == 2 && $heteroatoms[0] eq 'O' ) { # BBv2 P-25.2.2.3 return 'phenoxathiine' if $heteroatoms[1] eq 'S'; if( any { $heteroatoms[1] eq $_ } qw( Se Te ) ) { my $stem = 'phenoxa' . $elements{$heteroatoms[1]}->{prefix}; $stem =~ s/a$/ine/; return ChemOnomatopist::Name::Part::Stem->new( $stem )->to_name; } elsif( any { $heteroatoms[1] eq $_ } qw( P As Sb ) ) { my $stem = 'phenoxa' . $elements{$heteroatoms[1]}->{prefix}; $stem =~ s/a$/inine/; return ChemOnomatopist::Name::Part::Stem->new( $stem )->to_name; } } elsif( @heteroatoms == 2 && $heteroatoms[0] eq 'S' && $heteroatoms[1] eq 'As' ) { # BBv2 P-25.2.2.3 return ChemOnomatopist::Name::Part::Stem->new( 'phenothiarsinine' )->to_name; } die "cannot name xanthene derivatives\n"; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/ChainHalf.pm000066400000000000000000000025261463750375500227570ustar00rootroot00000000000000package ChemOnomatopist::ChainHalf; use strict; use warnings; use ChemOnomatopist::Chain; use parent ChemOnomatopist::Chain::; # ABSTRACT: Half of a longest chain # VERSION sub new { my( $class, $graph, $other_center, @vertices ) = @_; my $self = { vertices => \@vertices, graph => $graph, other_center => $other_center, cache => {} }; return bless $self, $class; } # Accessors # Groups are used to check which halves of a chain can be combined together. # If a graph contains single center, all halves will share the center. sub group() { my( $self ) = @_; return $self->{vertices}[1 - defined $self->{other_center}]; } sub _disconnected_chain_graph() { my( $self ) = @_; return $self->{_disconnected_chain_graph} if $self->{_disconnected_chain_graph}; my $graph = $self->graph->copy; my @vertices = $self->vertices; if( $self->{other_center} ) { # Cut the edge to the other center $graph->delete_edge( $vertices[0], $self->{other_center} ); } else { # Cut the edges to the other candidates for ($graph->neighbours( $vertices[0] )) { $graph->delete_edge( $vertices[0], $_ ); } } $graph->delete_path( @vertices ); $graph->delete_vertex( $self->parent ) if $self->parent; $self->{_disconnected_chain_graph} = $graph; return $graph; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Charge.pm000066400000000000000000000005771463750375500223370ustar00rootroot00000000000000package ChemOnomatopist::Charge; # ABSTRACT: Charged atom # VERSION use strict; use warnings; sub new { my( $class, $charge, $index, $locant ) = @_; return bless { charge => $charge, index => $index, locant => $locant }, $class; } sub charge() { $_[0]->{charge} } sub index() { $_[0]->{index} } sub locant() { $_[0]->{locant} } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Comparable/000077500000000000000000000000001463750375500226445ustar00rootroot00000000000000ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Comparable/Array/000077500000000000000000000000001463750375500237225ustar00rootroot00000000000000ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Comparable/Array/Isotope/000077500000000000000000000000001463750375500253445ustar00rootroot00000000000000ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Comparable/Array/Isotope/By/000077500000000000000000000000001463750375500257165ustar00rootroot00000000000000ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Comparable/Array/Isotope/By/AtomicNumber.pm000066400000000000000000000013701463750375500306420ustar00rootroot00000000000000package ChemOnomatopist::Comparable::Array::Isotope::By::AtomicNumber; # ABSTRACT: Comparable array of isotopes # VERSION use strict; use warnings; use overload '<=>' => \&cmp; use ChemOnomatopist::Util qw( array_frequencies ); use List::Util qw( uniq ); sub new { my $class = shift; return bless \@_, $class; } sub cmp { my( $A, $B ) = @_; my %A_freq = array_frequencies map { $_->atomic_number } @$A; my %B_freq = array_frequencies map { $_->atomic_number } @$B; my @keys = (keys %A_freq, keys %B_freq); for (reverse sort uniq @keys) { return 1 if !exists $A_freq{$_}; return -1 if !exists $B_freq{$_}; return $A_freq{$_} <=> $B_freq{$_} if $A_freq{$_} <=> $B_freq{$_}; } return 0; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Comparable/Array/Isotope/By/MassNumber.pm000066400000000000000000000013621463750375500303320ustar00rootroot00000000000000package ChemOnomatopist::Comparable::Array::Isotope::By::MassNumber; # ABSTRACT: Comparable array of isotopes # VERSION use strict; use warnings; use overload '<=>' => \&cmp; use ChemOnomatopist::Util qw( array_frequencies ); use List::Util qw( uniq ); sub new { my $class = shift; return bless \@_, $class; } sub cmp { my( $A, $B ) = @_; my %A_freq = array_frequencies map { $_->mass_number } @$A; my %B_freq = array_frequencies map { $_->mass_number } @$B; my @keys = (keys %A_freq, keys %B_freq); for (reverse sort uniq @keys) { return 1 if !exists $A_freq{$_}; return -1 if !exists $B_freq{$_}; return $A_freq{$_} <=> $B_freq{$_} if $A_freq{$_} <=> $B_freq{$_}; } return 0; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Comparable/Array/Numeric.pm000066400000000000000000000004331463750375500256620ustar00rootroot00000000000000package ChemOnomatopist::Comparable::Array::Numeric; # ABSTRACT: Comparable array of numbers # VERSION use strict; use warnings; use ChemOnomatopist::Util qw( cmp_arrays ); use overload '<=>' => \&cmp_arrays; sub new { my $class = shift; return bless \@_, $class; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Elements.pm000066400000000000000000000241441463750375500227160ustar00rootroot00000000000000package ChemOnomatopist::Elements; # ABSTRACT: Element properties from IUPAC Blue Book # VERSION use strict; use warnings; use parent Exporter::; our @EXPORT_OK = qw( %elements ); # Replacement prefixes and seniorities are taken from BBv2 Appendix 1 our %elements = ( F => { prefix => 'fluora', seniority => 0, }, Cl => { prefix => 'chlora', seniority => 1, }, Br => { prefix => 'broma', seniority => 2, }, I => { prefix => 'ioda', seniority => 3, }, At => { prefix => 'astata', seniority => 4, }, O => { prefix => 'oxa', seniority => 5, }, S => { prefix => 'thia', seniority => 6, }, Se => { prefix => 'selena', seniority => 7, }, Te => { prefix => 'tellura', seniority => 8, }, Po => { prefix => 'polona', seniority => 9, }, Lv => { prefix => 'livermora', seniority => 10, }, N => { prefix => 'aza', seniority => 11, }, P => { prefix => 'phospha', seniority => 12, }, As => { prefix => 'arsa', seniority => 13, }, Sb => { prefix => 'stiba', seniority => 14, }, Bi => { prefix => 'bisma', seniority => 15, }, C => { prefix => 'carba', seniority => 16, }, Si => { prefix => 'sila', seniority => 17, }, Ge => { prefix => 'germa', seniority => 18, }, Sn => { prefix => 'stanna', seniority => 19, }, Pb => { prefix => 'plumba', seniority => 20, }, Fl => { prefix => 'flerova', seniority => 21, }, B => { prefix => 'bora', seniority => 22, }, Al => { prefix => 'alumina', seniority => 23, }, Ga => { prefix => 'galla', seniority => 24, }, In => { prefix => 'inda', seniority => 25, }, Tl => { prefix => 'thalla', seniority => 26, }, Zn => { prefix => 'zinca', seniority => 27, }, Cd => { prefix => 'cadma', seniority => 28, }, Hg => { prefix => 'mercura', seniority => 29, }, Cn => { prefix => 'copernica', seniority => 30, }, Cu => { prefix => 'cupra', seniority => 31, }, Ag => { prefix => 'argenta', seniority => 32, }, Au => { prefix => 'aura', seniority => 33, }, Rg => { prefix => 'roentgena', seniority => 34, }, Ni => { prefix => 'nickela', seniority => 35, }, Pd => { prefix => 'pallada', seniority => 36, }, Pt => { prefix => 'platina', seniority => 37, }, Ds => { prefix => 'darmstadta', seniority => 38, }, Co => { prefix => 'cobalta', seniority => 39, }, Rh => { prefix => 'rhoda', seniority => 40, }, Ir => { prefix => 'irida', seniority => 41, }, Mt => { prefix => 'meitnera', seniority => 42, }, Fe => { prefix => 'ferra', seniority => 43, }, Ru => { prefix => 'ruthena', seniority => 44, }, Os => { prefix => 'osma', seniority => 45, }, Hs => { prefix => 'hassa', seniority => 46, }, Mn => { prefix => 'mangana', seniority => 47, }, Tc => { prefix => 'techneta', seniority => 48, }, Re => { prefix => 'rhena', seniority => 49, }, Bh => { prefix => 'bohra', seniority => 50, }, Cr => { prefix => 'chroma', seniority => 51, }, Mo => { prefix => 'molybda', seniority => 52, }, W => { prefix => 'tungsta', seniority => 53, }, Sg => { prefix => 'seaborga', seniority => 54, }, V => { prefix => 'vanada', seniority => 55, }, Nb => { prefix => 'nioba', seniority => 56, }, Ta => { prefix => 'tantala', seniority => 57, }, Db => { prefix => 'dubna', seniority => 58, }, Ti => { prefix => 'titana', seniority => 59, }, Zr => { prefix => 'zircona', seniority => 60, }, Hf => { prefix => 'hafna', seniority => 61, }, Rf => { prefix => 'rutherforda', seniority => 62, }, Sc => { prefix => 'scanda', seniority => 63, }, Y => { prefix => 'yttra', seniority => 64, }, La => { prefix => 'lanthana', seniority => 65, }, Ce => { prefix => 'cera', seniority => 66, }, Pr => { prefix => 'praseodyma', seniority => 67, }, Nd => { prefix => 'neodyma', seniority => 68, }, Pm => { prefix => 'prometha', seniority => 69, }, Sm => { prefix => 'samara', seniority => 70, }, Eu => { prefix => 'europa', seniority => 71, }, Gd => { prefix => 'gadolina', seniority => 72, }, Tb => { prefix => 'terba', seniority => 73, }, Dy => { prefix => 'dysprosa', seniority => 74, }, Ho => { prefix => 'holma', seniority => 75, }, Er => { prefix => 'erba', seniority => 76, }, Tm => { prefix => 'thula', seniority => 77, }, Yb => { prefix => 'ytterba', seniority => 78, }, Lu => { prefix => 'luteta', seniority => 79, }, Ac => { prefix => 'actina', seniority => 80, }, Th => { prefix => 'thora', seniority => 81, }, Pa => { prefix => 'protactina', seniority => 82, }, U => { prefix => 'urana', seniority => 83, }, Np => { prefix => 'neptuna', seniority => 84, }, Pu => { prefix => 'plutona', seniority => 85, }, Am => { prefix => 'america', seniority => 86, }, Cm => { prefix => 'cura', seniority => 87, }, Bk => { prefix => 'berkela', seniority => 88, }, Cf => { prefix => 'californa', seniority => 89, }, Es => { prefix => 'einsteina', seniority => 90, }, Fm => { prefix => 'ferma', seniority => 91, }, Md => { prefix => 'mendeleva', seniority => 92, }, No => { prefix => 'nobela', seniority => 93, }, Lr => { prefix => 'lawrenca', seniority => 94, }, Be => { prefix => 'berylla', seniority => 95, }, Mg => { prefix => 'magnesa', seniority => 96, }, Ca => { prefix => 'calca', seniority => 97, }, Sr => { prefix => 'stronta', seniority => 98, }, Ba => { prefix => 'bara', seniority => 99, }, Ra => { prefix => 'rada', seniority => 100, }, Li => { prefix => 'litha', seniority => 101, }, Na => { prefix => 'soda', seniority => 102, }, K => { prefix => 'potassa', seniority => 103, }, Rb => { prefix => 'rubida', seniority => 104, }, Cs => { prefix => 'caesa', seniority => 105, }, Fr => { prefix => 'franca', seniority => 106, }, He => { prefix => 'hela', seniority => 107, }, Ne => { prefix => 'neona', seniority => 108, }, Ar => { prefix => 'argona', seniority => 109, }, Kr => { prefix => 'kryptona', seniority => 110, }, Xe => { prefix => 'xenona', seniority => 111, }, Rn => { prefix => 'radona', seniority => 112, }, ); # Hantzsch-Widman system prefixes, where different, taken from BBv2 P-22.2.2.1.1, Table 2.4 # Standard bonding numbers are taken from BBv2 P-14.1.2, Table 1.3 my %old_elements = ( B => { standard_bonding_number => 3, }, C => { standard_bonding_number => 4, }, N => { standard_bonding_number => 3, }, O => { standard_bonding_number => 2, }, F => { standard_bonding_number => 1, }, Al => { HantzschWidman => 'aluma', standard_bonding_number => 3, }, Si => { standard_bonding_number => 4, }, P => { standard_bonding_number => 3, }, S => { standard_bonding_number => 2, }, Cl => { standard_bonding_number => 1, }, Ga => { standard_bonding_number => 3, }, Ge => { standard_bonding_number => 4, }, As => { standard_bonding_number => 3, }, Se => { standard_bonding_number => 2, }, Br => { standard_bonding_number => 1, }, In => { HantzschWidman => 'indiga', standard_bonding_number => 3, }, Sn => { standard_bonding_number => 4, }, Sb => { standard_bonding_number => 3, }, Te => { standard_bonding_number => 2, }, I => { standard_bonding_number => 1, }, Tl => { standard_bonding_number => 3, }, Pb => { standard_bonding_number => 4, }, Bi => { standard_bonding_number => 3, }, Po => { standard_bonding_number => 2, }, At => { standard_bonding_number => 1, }, ); for my $element (keys %old_elements) { for my $key (qw( HantzschWidman standard_bonding_number )) { next unless exists $old_elements{$element}->{$key}; $elements{$element}->{$key} = $old_elements{$element}->{$key}; } } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Grammar.pm000066400000000000000000000660451463750375500225360ustar00rootroot00000000000000package ChemOnomatopist::Grammar; # ABSTRACT: Grammar for chemical graphs # VERSION use strict; use warnings; use Algorithm::Combinatorics qw( combinations ); use ChemOnomatopist::Chain; use ChemOnomatopist::Chain::ABA; use ChemOnomatopist::Group::AcidHalide; use ChemOnomatopist::Group::Amidine; use ChemOnomatopist::Group::Carbaldehyde; use ChemOnomatopist::Group::Carbonitrile; use ChemOnomatopist::Chain::Carboxamide; use ChemOnomatopist::Chain::Chalcogen; use ChemOnomatopist::Chain::Circular; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Group::Ether; use ChemOnomatopist::Group::AcylHalide; use ChemOnomatopist::Group::Aldehyde; use ChemOnomatopist::Group::Amide; use ChemOnomatopist::Group::Amine; use ChemOnomatopist::Group::Carboxyl; use ChemOnomatopist::Group::Cyanide; use ChemOnomatopist::Group::Diazene; use ChemOnomatopist::Group::Hydroperoxide; use ChemOnomatopist::Group::Hydroxy; use ChemOnomatopist::Group::Imine; use ChemOnomatopist::Group::Isocyanate; use ChemOnomatopist::Group::Isocyanide; use ChemOnomatopist::Group::Ketone; use ChemOnomatopist::Group::Nitramide; use ChemOnomatopist::Group::Nitro; use ChemOnomatopist::Group::Nitroso; use ChemOnomatopist::Group::NoncarbonOxoacid; use ChemOnomatopist::Group::Peroxide; use ChemOnomatopist::Chain::Sulfimide; use ChemOnomatopist::Group::SulfinicAcid; use ChemOnomatopist::Group::Sulfinyl; use ChemOnomatopist::Group::SulfonicAcid; use ChemOnomatopist::Group::Sulfonyl; use ChemOnomatopist::Group::Urea; use ChemOnomatopist::Group::XO3; use Chemistry::OpenSMILES qw( is_double_bond ); use Graph::Grammar; use Graph::MoreUtils qw( graph_replace ); use List::Util qw( all any first sum ); use Scalar::Util qw( blessed ); use parent Exporter::; our @EXPORT_OK = qw( parse_molecular_graph ); sub is_nongroup { !$_[0]->groups( $_[1] ) } sub is_nongroup_atom { !blessed $_[1] && !$_[0]->groups( $_[1] ) && exists $_[1]->{symbol} } sub is_C { ChemOnomatopist::element( $_[1] ) && ChemOnomatopist::element( $_[1] ) eq 'C' } sub is_N { ChemOnomatopist::element( $_[1] ) && ChemOnomatopist::element( $_[1] ) eq 'N' } sub is_O { ChemOnomatopist::element( $_[1] ) && ChemOnomatopist::element( $_[1] ) eq 'O' } sub is_S { ChemOnomatopist::element( $_[1] ) && ChemOnomatopist::element( $_[1] ) eq 'S' } sub is_Se { ChemOnomatopist::element( $_[1] ) && ChemOnomatopist::element( $_[1] ) eq 'Se' } sub is_Te { ChemOnomatopist::element( $_[1] ) && ChemOnomatopist::element( $_[1] ) eq 'Te' } sub is_As_N_B_P_Se_Si_Sb_S_Te { ChemOnomatopist::element( $_[1] ) && ChemOnomatopist::element( $_[1] ) =~ /^(As|Br|Cl|F|I|Sb|Se|Si|Te|N|B|P|S)$/ } sub is_Br_Cl_F_I { ChemOnomatopist::element( $_[1] ) && ChemOnomatopist::element( $_[1] ) =~ /^(Br|Cl|F|I)$/ } sub is_Br_Cl_F_I_N { ChemOnomatopist::element( $_[1] ) && ChemOnomatopist::element( $_[1] ) =~ /^(Br|Cl|F|I|N)$/ } sub is_B_Cl_F_I { ChemOnomatopist::element( $_[1] ) && ChemOnomatopist::element( $_[1] ) =~ /^(B|Cl|F|I)$/ } sub is_S_Se_Te { ChemOnomatopist::element( $_[1] ) && ChemOnomatopist::element( $_[1] ) =~ /^(S|Se|Te)$/ } sub is_O_S_Se_Te { ChemOnomatopist::element( $_[1] ) && ChemOnomatopist::element( $_[1] ) =~ /^(O|S|Se|Te)$/ } sub is_C_N_O_S_Se_Te { ChemOnomatopist::element( $_[1] ) && ChemOnomatopist::element( $_[1] ) =~ /^(C|N|O|S|Se|Te)$/ } sub is_heteroatom { ChemOnomatopist::element( $_[1] ) && !&is_C } sub charge_plus_one { ChemOnomatopist::charge( $_[1] ) == 1 } sub charge_minus_one { ChemOnomatopist::charge( $_[1] ) == -1 } sub no_charge { !ChemOnomatopist::charge( $_[1] ) } sub has_H0 { !$_[1]->{hcount} } sub has_H1 { exists $_[1]->{hcount} && $_[1]->{hcount} == 1 } sub has_H2 { exists $_[1]->{hcount} && $_[1]->{hcount} == 2 } sub has_H3 { exists $_[1]->{hcount} && $_[1]->{hcount} == 3 } sub has_1_neighbour { $_[0]->degree( $_[1] ) == 1 } sub is_aldehyde { blessed $_[1] && $_[1]->isa( ChemOnomatopist::Group::Aldehyde:: ) } sub is_amide { blessed $_[1] && $_[1]->isa( ChemOnomatopist::Group::Amide:: ) } sub is_amine { blessed $_[1] && $_[1]->isa( ChemOnomatopist::Group::Amine:: ) } sub is_cyanide { blessed $_[1] && $_[1]->isa( ChemOnomatopist::Group::Cyanide:: ) } sub is_hydroxy { blessed $_[1] && $_[1]->isa( ChemOnomatopist::Group::Hydroxy:: ) } sub is_hydroperoxide { blessed $_[1] && $_[1]->isa( ChemOnomatopist::Group::Hydroperoxide:: ) } sub is_imine { blessed $_[1] && $_[1]->isa( ChemOnomatopist::Group::Imine:: ) } sub is_isocyanate { blessed $_[1] && $_[1]->isa( ChemOnomatopist::Group::Isocyanate:: ) } sub is_isocyanide { blessed $_[1] && $_[1]->isa( ChemOnomatopist::Group::Isocyanide:: ) } sub is_ketone { blessed $_[1] && $_[1]->isa( ChemOnomatopist::Group::Ketone:: ) } sub is_nitro { blessed $_[1] && $_[1]->isa( ChemOnomatopist::Group::Nitro:: ) } sub is_sulfinyl { blessed $_[1] && $_[1]->isa( ChemOnomatopist::Group::Sulfinyl:: ) } sub is_sulfonyl { blessed $_[1] && $_[1]->isa( ChemOnomatopist::Group::Sulfonyl:: ) } sub is_XO3 { blessed $_[1] && $_[1]->isa( ChemOnomatopist::Group::XO3:: ) } sub is_benzene { any { $_->isa( ChemOnomatopist::Chain::Monocycle:: ) && $_->is_benzene } $_[0]->groups( $_[1] ) } sub is_chalcogen { any { $_->isa( ChemOnomatopist::Chain::Chalcogen:: ) } $_[0]->groups( $_[1] ) } sub is_circular { any { $_->isa( ChemOnomatopist::Chain::Circular:: ) } $_[0]->groups( $_[1] ) } sub is_monocycle { any { $_->isa( ChemOnomatopist::Chain::Monocycle:: ) } $_[0]->groups( $_[1] ) } sub is_hydrazine { any { $_->isa( ChemOnomatopist::Group::Hydrazine:: ) } $_[0]->groups( $_[1] ) } sub is_ABA_chain { any { $_->isa( ChemOnomatopist::Chain::ABA:: ) } $_[0]->groups( $_[1] ) } sub looks_like_ABA_chain { my( $graph, $center ) = @_; # Require two neighbours my @neighbours = blessed $center ? $center->substituents : $graph->neighbours( $center ); return '' unless @neighbours == 2; if( all { blessed $_ && $_->isa( ChemOnomatopist::Chain::ABA:: ) } @neighbours ) { # ABA chains on both sides return '' unless $neighbours[0]->outer_element eq $neighbours[1]->outer_element; return '' unless $neighbours[0]->inner_element eq $neighbours[1]->inner_element; return '' unless $neighbours[0]->inner_element eq ChemOnomatopist::element( $center ); } elsif( any { blessed $_ && $_->isa( ChemOnomatopist::Chain::ABA:: ) } @neighbours ) { # ABA chain on one side @neighbours = reverse @neighbours if blessed $neighbours[1] && $neighbours[1]->isa( ChemOnomatopist::Chain::ABA:: ); return '' if any { blessed $_ } ( $center, $neighbours[1] ); return '' if $neighbours[0]->inner_element eq ChemOnomatopist::element( $center ); return '' if $neighbours[0]->outer_element eq ChemOnomatopist::element( $neighbours[1] ); } else { # No ABA chain yet return '' if any { blessed $_ } @neighbours; return '' unless ChemOnomatopist::element( $neighbours[0] ) eq ChemOnomatopist::element( $neighbours[1] ); my $outer = ChemOnomatopist::element( $neighbours[0] ); my $inner = ChemOnomatopist::element( $center ); return '' unless $elements{$outer}->{seniority} > $elements{$inner}->{seniority}; } return 1; } sub anything { 1 } my @rules = ( # Guanidine [ sub { &is_nongroup_atom && &is_C && &has_H0 }, ( sub { &is_nongroup_atom && &is_N } ) x 3, NO_MORE_VERTICES, sub { my $guanidine = ChemOnomatopist::Group::Guanidine->new( $_[0], $_[1] ); $_[0]->add_group( $guanidine ); $_[0]->delete_vertex( $_[1] ); for (combinations( [ @_[2..4] ], 2 )) { $_[0]->add_edge( @$_ ); } } ], # Chalcogen chains [ ( sub { &is_nongroup_atom && &is_O } ) x 3, NO_MORE_VERTICES, sub { $_[0]->add_group( ChemOnomatopist::Chain::Chalcogen->new( $_[0], undef, @_[1..3] ) ) } ], [ ( sub { &is_nongroup_atom && &is_S } ) x 3, NO_MORE_VERTICES, sub { $_[0]->add_group( ChemOnomatopist::Chain::Chalcogen->new( $_[0], undef, @_[1..3] ) ) } ], [ ( sub { &is_nongroup_atom && &is_Se } ) x 3, NO_MORE_VERTICES, sub { $_[0]->add_group( ChemOnomatopist::Chain::Chalcogen->new( $_[0], undef, @_[1..3] ) ) } ], [ ( sub { &is_nongroup_atom && &is_Te } ) x 3, NO_MORE_VERTICES, sub { $_[0]->add_group( ChemOnomatopist::Chain::Chalcogen->new( $_[0], undef, @_[1..3] ) ) } ], # Carboxylic acid [ sub { &is_nongroup_atom && &is_C }, sub { &is_hydroxy || &is_hydroperoxide || ( &is_nongroup_atom && &is_O && &charge_minus_one ) }, sub { &is_ketone || &is_hydrazine || &is_imine }, \&anything, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Carboxyl->new( $_[2], $_[3] ), @_[1..3] ) } ], # Nitramide [ sub { &is_nongroup_atom && &is_N && &charge_plus_one }, ( sub { &is_nongroup_atom && &is_O && &has_1_neighbour } ) x 2, sub { &is_nongroup_atom && &is_N }, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Nitramide->new, @_[1..4] ) } ], # Hydrazine and diazene [ ( sub { &is_nongroup_atom && &is_N } ) x 2, sub { is_double_bond( @_ ) ? $_[0]->add_group( ChemOnomatopist::Group::Diazene->new( @_[0..2] ) ) : $_[0]->add_group( ChemOnomatopist::Group::Hydrazine->new( @_[0..2] ) ) } ], # Hydrazide [ sub { &is_nongroup_atom && &is_C }, \&is_hydrazine, \&is_ketone, sub { my $hydrazine = first { $_->isa( ChemOnomatopist::Group::Hydrazine:: ) } $_[0]->groups( $_[2] ); my @vertices = $hydrazine->vertices; @vertices = reverse @vertices if $vertices[0] == $_[1]; my $hydrazide = ChemOnomatopist::Group::Hydrazide->new( $_[0], $_[3], @vertices ); $_[0]->delete_vertices( $_[3] ); $_[0]->add_group( $hydrazide ); $_[0]->delete_group( $hydrazine ); } ], # Amide # CHECKME: Why ketone has to be deleted and not used in replace? It fails somewhy. [ sub { &is_nongroup_atom && &is_C }, \&is_amine, \&is_ketone, sub { $_[0]->delete_vertices( $_[3] ); graph_replace( $_[0], ChemOnomatopist::Group::Amide->new( $_[1], $_[3] ), $_[2] ) } ], [ sub { &is_sulfinyl || &is_sulfonyl }, \&is_amine, sub { graph_replace( $_[0], ChemOnomatopist::Group::Amide->new( $_[1] ), $_[2] ) } ], # Aldehyde [ sub { &is_nongroup_atom && &is_C && &has_H1 }, \&is_ketone, sub { graph_replace( $_[0], ChemOnomatopist::Group::Aldehyde->new( $_[2] ), @_[1..2] ) } ], # Aldehydes attached to carbon in cyclic system or a heteroatom [ \&is_aldehyde, sub { &is_circular || ( &is_nongroup_atom && &is_heteroatom ) }, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Carbaldehyde->new( $_[1] ), $_[1] ) } ], # Acid halide [ sub { &is_sulfinyl || &is_sulfonyl }, \&is_Br_Cl_F_I, sub { graph_replace( $_[0], ChemOnomatopist::Group::AcidHalide->new( $_[1], ChemOnomatopist::element( $_[2] ) ), @_[1..2] ) } ], # Acyl halide [ sub { &is_nongroup_atom && &is_C }, sub { &is_nongroup_atom && &is_Br_Cl_F_I }, sub { &is_ketone && &is_O }, \&is_C, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::AcylHalide->new( $_[2] ), @_[1..3] ) } ], [ sub { &is_nongroup_atom && &is_C }, sub { &is_cyanide || &is_isocyanide || &is_isocyanate }, sub { &is_ketone && &is_O }, \&is_C, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::AcylHalide->new( $_[2] ), @_[1..3] ) } ], # a(ba)n chain [ sub { &is_nongroup_atom && &is_heteroatom && &looks_like_ABA_chain }, ( sub { &is_nongroup_atom && &is_heteroatom } ) x 2, NO_MORE_VERTICES, sub { $_[0]->add_group( ChemOnomatopist::Chain::ABA->new( $_[0], $_[2], $_[1], $_[3] ) ) } ], [ sub { &is_nongroup_atom && &is_heteroatom && &looks_like_ABA_chain }, \&is_ABA_chain, sub { &is_nongroup_atom && &is_heteroatom }, NO_MORE_VERTICES, sub { for ($_[0]->groups( $_[2] )) { $_->add( $_[1] ); $_->add( $_[3] ) } } ], [ sub { &is_nongroup_atom && &is_heteroatom && &looks_like_ABA_chain }, ( \&is_ABA_chain ) x 2, NO_MORE_VERTICES, sub { my( $target ) = $_[0]->groups( $_[2] ); my( $source ) = $_[0]->groups( $_[3] ); $target->add( $_[1] ); $target->add( $source ); $_[0]->delete_group( $source ); } ], # O-based groups [ sub { &is_nongroup_atom && &is_O && ( &has_H1 || &charge_minus_one ) }, \&anything, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Hydroxy->new( $_[1] ), $_[1] ) } ], [ sub { &is_nongroup_atom && &is_O && all { is_double_bond( @_, $_ ) } $_[0]->neighbours( $_[1] ) }, \&anything, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Ketone->new( ChemOnomatopist::element( $_[1] ) ), $_[1] ) } ], # Ester [ sub { &is_nongroup_atom && &is_C }, sub { &is_ketone && &is_O }, sub { &is_nongroup_atom && &is_O && &no_charge }, \&is_C, NO_MORE_VERTICES, sub { $_[0]->delete_vertex( $_[2] ); $_[0]->add_group( ChemOnomatopist::Group::Ester->new( @_[0..3] ) ) } ], # Ether [ sub { &is_nongroup_atom && &is_O }, ( \&is_C ) x 2, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Ether->new, $_[1] ) } ], # Hydroxy groups and their chalcogen analogues [ sub { &is_nongroup_atom && &is_O_S_Se_Te && ( &has_H1 || &charge_minus_one ) }, \&is_C_N_O_S_Se_Te, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Hydroxy->new( $_[1] ), $_[1] ) } ], # Ketones and their chalcogen analogues [ sub { &is_nongroup_atom && &is_O_S_Se_Te && all { is_double_bond( @_, $_ ) } $_[0]->neighbours( $_[1] ) }, \&anything, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Ketone->new( ChemOnomatopist::element( $_[1] ) ), $_[1] ) } ], # Urea [ sub { &is_nongroup_atom && &is_C }, \&is_ketone, ( sub { &is_nongroup_atom && &is_N } ) x 2, NO_MORE_VERTICES, sub { $_[0]->add_group( ChemOnomatopist::Group::Urea->new( @_ ) ) } ], # Isocyanide [ sub { &is_nongroup_atom && &is_C && &has_H0 && &charge_minus_one }, sub { &is_nongroup_atom && &is_N && &has_H0 && &charge_plus_one }, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Isocyanide->new, @_[1..2] ) } ], # Isocyanate [ sub { &is_nongroup_atom && &is_C && &has_H0 && &no_charge }, \&is_ketone, \&is_N, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Isocyanate->new( $_[2]->element ), @_[1..3] ) } ], # N-based groups [ sub { &is_nongroup_atom && &is_N && &has_H0 && &no_charge }, sub { &is_nongroup_atom && &is_C && &no_charge }, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Cyanide->new, @_[1..2] ) } ], [ sub { &is_nongroup_atom && &is_N && &no_charge }, ( \&anything ) x 3, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Amine->new, $_[1] ) } ], [ sub { &is_nongroup_atom && &is_N && &has_H1 }, ( \&anything ) x 2, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Amine->new, $_[1] ) } ], [ sub { &is_nongroup_atom && &is_N && &has_H2 }, \&anything, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Amine->new, $_[1] ) } ], [ sub { &is_nongroup_atom && &is_N && &has_H3 }, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Amine->new, $_[1] ) } ], [ sub { &is_nongroup_atom && &is_N && &has_H0 && any { ChemOnomatopist::element( $_ ) eq 'C' && is_double_bond( @_, $_ ) && $_[0]->degree( $_ ) + ($_->{hcount} ? $_->{hcount} : 0) == 3 } $_[0]->neighbours( $_[1] ) }, \&is_C, \&anything, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Imine->new, $_[1] ) } ], [ sub { &is_nongroup_atom && &is_N && &has_H1 && any { ChemOnomatopist::element( $_ ) eq 'C' && is_double_bond( @_, $_ ) && $_[0]->degree( $_ ) + ($_->{hcount} ? $_->{hcount} : 0) == 3 } $_[0]->neighbours( $_[1] ) }, \&is_C, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Imine->new, $_[1] ) } ], [ sub { &is_nongroup_atom && &is_N && &charge_plus_one }, \&is_ketone, sub { &is_hydroxy && &is_O && &charge_minus_one }, \&anything, sub { graph_replace( $_[0], ChemOnomatopist::Group::Nitro->new, @_[1..3] ) } ], [ sub { &is_nongroup_atom && &is_N && &has_H0 && any { ChemOnomatopist::element( $_ ) =~ /^(S|Se|Te)$/ && is_double_bond( @_, $_ ) && $_[0]->degree( $_ ) + ($_->{hcount} ? $_->{hcount} : 0) == 3 } $_[0]->neighbours( $_[1] ) }, sub { &is_nongroup_atom && &is_S_Se_Te }, \&anything, NO_MORE_VERTICES, sub { $_[0]->add_group( ChemOnomatopist::Chain::Sulfimide->new( @_[0..2] ) ) } ], # Esters of nitric acid and nitrous acid [ sub { &is_nongroup_atom && &is_N && &charge_plus_one }, \&is_ketone, sub { &is_nongroup_atom && &is_O && &charge_minus_one }, \&is_O_S_Se_Te, NO_MORE_VERTICES, sub { die "cannot handle nitric/nitrous acid esters yet\n" } ], [ sub { &is_nongroup_atom && &is_N }, \&is_ketone, \&is_O_S_Se_Te, NO_MORE_VERTICES, sub { die "cannot handle nitric/nitrous acid esters yet\n" } ], # Carbonitrile, special case of cyanide [ \&is_cyanide, \&is_circular, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Carbonitrile->new, $_[1] ) } ], [ \&is_cyanide, \&is_heteroatom, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Carbonitrile->new, $_[1] ) } ], # Amidines (BBv3 P-66.4.1) [ sub { &is_nongroup_atom && &is_C }, sub { &is_amine && &is_nongroup }, sub { &is_imine && &is_nongroup }, sub { $_[0]->add_group( ChemOnomatopist::Group::Amidine->new( @_[0..3] ) ) } ], [ sub { &is_nongroup_atom && &is_S_Se_Te }, sub { &is_amine && &is_nongroup }, sub { &is_nongroup_atom && &is_N && &has_H1 }, \&anything, NO_MORE_VERTICES, sub { $_[0]->add_group( ChemOnomatopist::Group::Amidine->new( @_[0..3] ) ) } ], [ sub { &is_nongroup_atom && &is_S_Se_Te }, ( sub { &is_amine && &is_nongroup } ) x 2, sub { &is_nongroup_atom && &is_N && &has_H1 }, \&anything, NO_MORE_VERTICES, sub { $_[0]->add_group( ChemOnomatopist::Group::Amidine->new( @_[0..4] ) ) } ], # Nitroso and its analogues [ sub { &is_nongroup_atom && &is_Br_Cl_F_I_N }, \&is_ketone, \&is_C, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Nitroso->new( ChemOnomatopist::element( $_[1] ) ), @_[1..2] ) } ], # XO3 [ sub { &is_nongroup_atom && &is_Br_Cl_F_I }, ( sub { &is_ketone && &is_O } ) x 3, sub { graph_replace( $_[0], ChemOnomatopist::Group::XO3->new( ChemOnomatopist::element( $_[1] ) ), @_[1..4] ) } ], # Peroxide [ sub { &is_nongroup_atom && &is_O }, sub { ( &is_nongroup_atom && &is_O ) || ( &is_hydroxy && &charge_minus_one ) }, sub { &is_C }, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Peroxide->new( @_[1..2] ), @_[1..2] ) } ], # Hydroperoxide [ sub { &is_nongroup_atom && &is_O_S_Se_Te }, \&is_hydroxy, \&anything, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Hydroperoxide->new( $_[1], $_[2] ), @_[1..2] ) } ], [ sub { &is_nongroup_atom && &is_O_S_Se_Te }, sub { &is_nongroup_atom && &is_O && &charge_minus_one }, \&anything, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Hydroperoxide->new( $_[1], $_[2] ), @_[1..2] ) } ], # S-based groups [ sub { &is_nongroup_atom && &is_S_Se_Te }, sub { &is_ketone || ( &is_nongroup_atom && &is_N && &has_H1 ) || &is_hydrazine }, sub { &is_hydroxy || &is_hydroperoxide }, \&is_C, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::SulfinicAcid->new( ChemOnomatopist::element( $_[1] ), @_[2..3] ), @_[1..3] ) } ], [ sub { &is_nongroup_atom && &is_S_Se_Te }, ( sub { &is_ketone || ( &is_nongroup_atom && &is_N && &has_H1 ) || &is_hydrazine } ) x 2, sub { &is_hydroxy || &is_hydroperoxide }, \&is_C, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::SulfonicAcid->new( ChemOnomatopist::element( $_[1] ), @_[2..4] ), @_[1..4] ) } ], # Sulfoxide group and its analogues [ sub { &is_nongroup_atom && &is_S_Se_Te }, \&is_ketone, ( \&anything ) x 2, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Sulfinyl->new( ChemOnomatopist::element( $_[1] ) ), @_[1..2] ) } ], [ sub { &is_nongroup_atom && &is_S_Se_Te }, ( \&is_ketone ) x 2, ( \&anything ) x 2, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Sulfonyl->new( ChemOnomatopist::element( $_[1] ) ), @_[1..3] ) } ], # Noncarbon oxoacids [ sub { &is_nongroup_atom && &is_As_N_B_P_Se_Si_Sb_S_Te }, ( sub { &is_hydroxy && &is_O } ) x 4, sub { graph_replace( $_[0], ChemOnomatopist::Group::NoncarbonOxoacid->new( @_[1..5] ), @_[1..5] ) } ], [ sub { &is_nongroup_atom && &is_As_N_B_P_Se_Si_Sb_S_Te }, ( sub { &is_hydroxy && &is_O } ) x 2, ( sub { &is_ketone && &is_O } ) x 2, sub { graph_replace( $_[0], ChemOnomatopist::Group::NoncarbonOxoacid->new( @_[1..5] ), @_[1..5] ) } ], [ sub { &is_nongroup_atom && &is_As_N_B_P_Se_Si_Sb_S_Te }, ( sub { &is_hydroxy && &is_O } ) x 3, sub { &is_ketone && &is_O }, sub { graph_replace( $_[0], ChemOnomatopist::Group::NoncarbonOxoacid->new( @_[1..5] ), @_[1..5] ) } ], [ sub { &is_nongroup_atom && &is_As_N_B_P_Se_Si_Sb_S_Te }, ( sub { &is_hydroxy && &is_O } ) x 3, sub { graph_replace( $_[0], ChemOnomatopist::Group::NoncarbonOxoacid->new( @_[1..4] ), @_[1..4] ) } ], [ sub { &is_nongroup_atom && &is_As_N_B_P_Se_Si_Sb_S_Te }, ( sub { &is_hydroxy && &is_O } ) x 2, sub { &is_ketone && &is_O }, sub { graph_replace( $_[0], ChemOnomatopist::Group::NoncarbonOxoacid->new( @_[1..4] ), @_[1..4] ) } ], [ sub { &is_nongroup_atom && &is_As_N_B_P_Se_Si_Sb_S_Te }, ( sub { &is_ketone && &is_O } ) x 2, sub { &is_hydroxy && &is_O }, sub { graph_replace( $_[0], ChemOnomatopist::Group::NoncarbonOxoacid->new( @_[1..4] ), @_[1..4] ) } ], [ sub { &is_nongroup_atom && &is_As_N_B_P_Se_Si_Sb_S_Te }, ( sub { &is_hydroxy && &is_O } ) x 2, sub { graph_replace( $_[0], ChemOnomatopist::Group::NoncarbonOxoacid->new( @_[1..3] ), @_[1..3] ) } ], [ sub { &is_nongroup_atom && &is_As_N_B_P_Se_Si_Sb_S_Te }, sub { &is_hydroxy && &is_O }, sub { &is_ketone && &is_O }, sub { graph_replace( $_[0], ChemOnomatopist::Group::NoncarbonOxoacid->new( @_[1..3] ), @_[1..3] ) } ], [ sub { &is_nongroup_atom && &is_As_N_B_P_Se_Si_Sb_S_Te }, sub { &is_hydroxy && &is_O }, sub { graph_replace( $_[0], ChemOnomatopist::Group::NoncarbonOxoacid->new( @_[1..2] ), @_[1..2] ) } ], [ \&is_XO3, sub { &is_hydroxy && &is_O }, sub { graph_replace( $_[0], ChemOnomatopist::Group::NoncarbonOxoacid->new( @_[1..2] ), @_[1..2] ) } ], [ sub { &is_sulfinyl || &is_sulfonyl }, ( sub { &is_hydroxy && &is_O } ) x 2, sub { graph_replace( $_[0], ChemOnomatopist::Group::NoncarbonOxoacid->new( @_[1..3] ), @_[1..3] ) } ], [ \&is_nitro, sub { &is_hydroxy && &is_O }, sub { graph_replace( $_[0], ChemOnomatopist::Group::NoncarbonOxoacid->new( @_[1..2] ), @_[1..2] ) } ], # Sulfinamides and sulfonamides [ \&is_amide, \&is_sulfinyl, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Sulfinamide->new( $_[2]->element ), @_[1..2] ) } ], [ \&is_amide, \&is_sulfonyl, NO_MORE_VERTICES, sub { graph_replace( $_[0], ChemOnomatopist::Group::Sulfonamide->new( $_[2]->element ), @_[1..2] ) } ], # Detecting amides attached to cyclic chains [ sub { &is_nongroup_atom && &is_C && 1 == grep { blessed $_ && $_->isa( ChemOnomatopist::Group::Amide:: ) && $_->{parent} == $_[1] } $_[0]->neighbours( $_[1] ) }, \&is_amide, \&is_monocycle, NO_MORE_VERTICES, sub { my( $cycle ) = $_[0]->groups( $_[3] ); $_[0]->delete_group( $cycle ); $_[0]->add_group( ChemOnomatopist::Chain::Carboxamide->new( $_[0], $_[2], $_[1], $cycle ) ) } ], ); # Old unused rules my @rules_old = ( [ \&is_C, \&is_benzene, \&is_ketone, \&is_N, NO_MORE_VERTICES, sub { graph_replace( $_[0], { type => 'benzamide' }, @_[1..4] ) } ], [ \&is_benzene, \&is_hydroxy, sub { graph_replace( $_[0], { type => 'phenol' }, @_[1..2] ) } ], ); sub is_mainchain() { any { $_->is_main } $_[0]->groups( $_[1] ) } sub is_carboxamide() { any { $_->isa( ChemOnomatopist::Chain::Carboxamide:: ) } $_[0]->groups( $_[1] ) } sub is_purine() { any { $_->isa( ChemOnomatopist::Chain::Bicycle::Purine:: ) } $_[0]->groups( $_[1] ) } sub most_senior_group() { my @most_senior_groups = map { $_->most_senior_groups } $_[0]->groups( $_[1] ); return shift @most_senior_groups } sub number_of_most_senior_groups() { scalar map { $_->most_senior_groups } $_[0]->groups( $_[1] ) } our @mainchain_rules = ( # Amide chains [ sub { &is_mainchain && !&is_carboxamide && &most_senior_group && &most_senior_group->isa( ChemOnomatopist::Group::Amide:: ) && &number_of_most_senior_groups == 1 }, sub { &is_amide && &is_nongroup }, sub { my( $chain ) = $_[0]->groups( $_[1] ); $_[0]->delete_group( $chain ); my $amide = ChemOnomatopist::Chain::Amide->new( $_[0], $chain, $_[2] ); $amide->{is_main} = 1; $_[0]->add_group( $amide ); } ], # Amine chains [ sub { &is_mainchain && !&is_purine && &most_senior_group && &most_senior_group->isa( ChemOnomatopist::Group::Amine:: ) && &number_of_most_senior_groups == 1 }, sub { &is_amine && &is_nongroup }, sub { my( $chain ) = $_[0]->groups( $_[1] ); $_[0]->delete_group( $chain ); my $amine = ChemOnomatopist::Chain::Amine->new( $_[0], $chain, $_[2] ); $amine->{is_main} = 1; $_[0]->add_group( $amine ); } ], # Imine chains [ sub { &is_mainchain && &most_senior_group && &most_senior_group->isa( ChemOnomatopist::Group::Imine:: ) && &number_of_most_senior_groups == 1 }, sub { &is_imine && &is_nongroup }, sub { my( $chain ) = $_[0]->groups( $_[1] ); $_[0]->delete_group( $chain ); my $imine = ChemOnomatopist::Chain::Imine->new( $_[0], $chain, $_[2] ); $imine->{is_main} = 1; $_[0]->add_group( $imine ); } ], # Unclaimed amine group attached to other chains [ sub { !&is_mainchain && &is_circular && &most_senior_group && &most_senior_group->isa( ChemOnomatopist::Group::Amine:: ) && &number_of_most_senior_groups == 1 }, sub { &is_amine && &is_nongroup }, sub { my( $chain ) = $_[0]->groups( $_[1] ); $_[0]->delete_group( $chain ); my $amine = ChemOnomatopist::Chain::Amine->new( $_[0], $chain, $_[2] ); $_[0]->add_group( $amine ); } ], # Unpack sidechain amides [ sub { !&is_mainchain && &is_amide && !&is_carboxamide }, sub { graph_replace( $_[0], ChemOnomatopist::Group::Amine->new, $_[1] ); $_[0]->set_edge_attribute( $_[1]->{parent}, $_[1]->{ketone}, 'bond', '=' ) if $_[1]->{ketone}; } ], ); sub parse_molecular_graph($) { my( $graph ) = @_; return parse_graph( $graph, @rules ); } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group.pm000066400000000000000000000136271463750375500222420ustar00rootroot00000000000000package ChemOnomatopist::Group; # ABSTRACT: Chemical group # VERSION use strict; use warnings; use ChemOnomatopist::Chain::Circular; use ChemOnomatopist::Group::AcidHalide; use ChemOnomatopist::Group::AcylHalide; use ChemOnomatopist::Group::Aldehyde; use ChemOnomatopist::Group::Amide; use ChemOnomatopist::Group::Amidine; use ChemOnomatopist::Group::Amine; use ChemOnomatopist::Group::Carbaldehyde; use ChemOnomatopist::Group::Carbonitrile; use ChemOnomatopist::Group::Carboxyl; use ChemOnomatopist::Group::Cyanide; use ChemOnomatopist::Group::Diazene; use ChemOnomatopist::Group::Ester; use ChemOnomatopist::Group::Ether; use ChemOnomatopist::Group::Guanidine; use ChemOnomatopist::Group::Hydrazide; use ChemOnomatopist::Group::Hydrazine; use ChemOnomatopist::Group::Hydroperoxide; use ChemOnomatopist::Group::Hydroxy; use ChemOnomatopist::Group::Imine; use ChemOnomatopist::Group::Isocyanate; use ChemOnomatopist::Group::Isocyanide; use ChemOnomatopist::Group::Ketone; use ChemOnomatopist::Group::Nitramide; use ChemOnomatopist::Group::Sulfinamide; use ChemOnomatopist::Group::Sulfonamide; use ChemOnomatopist::Group::SulfinicAcid; use ChemOnomatopist::Group::SulfonicAcid; use ChemOnomatopist::Group::Urea; use List::Util qw( any ); use Scalar::Util qw( blessed ); # Order from BBv2 P-41 our @order = ( # Radicals # Radical anions # Radical cations # Anions # Zwitterions # Cations # Acids (BBv2 P-42) ChemOnomatopist::Group::Carboxyl::, ChemOnomatopist::Group::SulfonicAcid::, ChemOnomatopist::Group::SulfinicAcid::, # Anhydrides ChemOnomatopist::Group::Ester::, # Acid halides and pseudohalides ChemOnomatopist::Group::AcidHalide::, ChemOnomatopist::Group::AcylHalide::, # Amides ChemOnomatopist::Group::Urea::, ChemOnomatopist::Group::Sulfonamide::, ChemOnomatopist::Group::Sulfinamide::, ChemOnomatopist::Group::Amide::, ChemOnomatopist::Group::Guanidine::, ChemOnomatopist::Group::Nitramide::, # FIXME: Is this correct? # Hydrazides ChemOnomatopist::Group::Hydrazide::, # Imides # 14. Nitriles ChemOnomatopist::Group::Carbonitrile::, ChemOnomatopist::Group::Cyanide::, ChemOnomatopist::Group::Isocyanate::, # CHECKME: Is this correct? ChemOnomatopist::Group::Isocyanide::, # CHECKME: Is this correct? ChemOnomatopist::Group::Carbaldehyde::, # CHECKME: How is this related to ChemOnomatopist::Group::Aldehyde? ChemOnomatopist::Group::Aldehyde::, ChemOnomatopist::Group::Ketone::, ChemOnomatopist::Group::Hydroxy::, ChemOnomatopist::Group::Hydroperoxide::, ChemOnomatopist::Group::Amidine::, # CHECKME: Is this correct? ChemOnomatopist::Group::Amine::, ChemOnomatopist::Group::Imine::, # TODO: Some are omitted # TODO: Classes denoted by the senior atom in heterane nomenclature should go here # 21. Nitrogen compounds ChemOnomatopist::Group::Hydrazine::, ChemOnomatopist::Group::Diazene::, # 41. Ethers, then sulfides, sulfoxides, sulfones; then selenides, selenoxides, etc. ChemOnomatopist::Group::Ether::, ); sub new { my( $class, $element ) = @_; return bless { element => $element }, $class; } sub element() { $_[0]->{element} } sub charge() { 0 } sub is_part_of_chain() { '' } # Certain groups can only be expressed as prefixes sub is_prefix_only() { '' } # Certain groups can only be terminal in chains sub is_terminal() { '' } sub needs_heteroatom_locants { 1 } sub needs_heteroatom_names { 1 } sub needs_multiple_bond_suffix { 1 } sub prefix() { '' } sub suffix() { $_[0]->is_prefix_only ? undef : '' } sub multisuffix() { $_[0]->suffix } sub suffix_if_cycle_substituent() { $_[0]->suffix } sub candidate_for() { my( $self ) = @_; return undef unless exists $self->{candidate_for}; return $self->{candidate_for}; } sub rule_greatest_number_of_most_senior_heteroatoms { my( @chains ) = @_; # This order is taken from BBv2 P-41 and is different from order in %elements my @element_order = qw( N P As Sb Bi Si Ge Sn Pb B Al Ga In Tl O S Se Te ); my %element_order = map { $element_order[$_] => $_ } 0..$#element_order; my( $most_senior ) = sort { $element_order{$a} <=> $element_order{$b} } grep { exists $element_order{$_} } map { $_->heteroatoms } @chains; return @chains unless $most_senior; my( $max_value ) = reverse sort map { scalar( grep { $_ eq $most_senior } $_->heteroatoms ) } @chains; return grep { scalar( grep { $_ eq $most_senior } $_->heteroatoms ) == $max_value } @chains; } # Compare seniority of two objects sub cmp { my( $A, $B ) = @_; my( $A_pos ) = grep { $A->isa( $order[$_] ) } 0..$#order; my( $B_pos ) = grep { $B->isa( $order[$_] ) } 0..$#order; # Clear distinction exists if( defined $A_pos && defined $B_pos && $A_pos <=> $B_pos ) { return $A_pos <=> $B_pos; } # Any of the objects is in the priority list if( defined $A_pos ^ defined $B_pos ) { return defined $B_pos <=> defined $A_pos; } # Same class; class should know how to compare if( blessed $A eq blessed $B ) { return $A->_cmp_instances( $B ); } # BBv2 P-41 # First, the chain with the most senior atom wins # FIXME: Select just by seniority, not by number my @chains = rule_greatest_number_of_most_senior_heteroatoms( $A, $B ); return ($chains[0] == $B) * 2 - 1 if @chains; # Second, the order is heterocycles, polyheteroatom, heteroatom if( $A->isa( ChemOnomatopist::Chain::Circular:: ) + 0 ^ $B->isa( ChemOnomatopist::Chain::Circular:: ) + 0 ) { return $B->isa( ChemOnomatopist::Chain::Circular:: ) <=> $A->isa( ChemOnomatopist::Chain::Circular:: ); } # TODO: The remaining rules from P-41 die "cannot compare\n"; } # Two instances of the same group are thought to be of the same seniority sub _cmp_instances { 0 } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/000077500000000000000000000000001463750375500216735ustar00rootroot00000000000000ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/AcidHalide.pm000066400000000000000000000010651463750375500242020ustar00rootroot00000000000000package ChemOnomatopist::Group::AcidHalide; # ABSTRACT: Acid halide group # VERSION use parent ChemOnomatopist::Group::; use ChemOnomatopist::Elements qw( %elements ); sub new { my( $class, $group, $element ) = @_; return bless { group => $group, element => $element }, $class; } sub element() { $_[0]->{group}->element } sub prefix() { $_[0]->suffix } sub suffix() { my( $self ) = @_; my $name = $self->{group}->prefix . ' '; my $halide = $elements{$self->{element}}->{prefix}; $halide =~ s/a$/ide/; return $name . $halide; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/AcylHalide.pm000066400000000000000000000021241463750375500242270ustar00rootroot00000000000000package ChemOnomatopist::Group::AcylHalide; use strict; use warnings; # ABSTRACT: Acyl halide group # VERSION use ChemOnomatopist::Elements qw( %elements ); use List::Util qw( any ); use Scalar::Util qw( blessed ); use parent ChemOnomatopist::Group::; sub new { my( $class, $halide ) = @_; return bless { halide => $halide }, $class; } sub element() { return 'C' } sub prefix() { my( $self ) = @_; my $name = 'carbono'; $name .= blessed $self->{halide} ? $self->{halide}->prefix : $elements{$self->{halide}{symbol}}->{prefix}; $name =~ s/a$/idoyl/; return $name; } sub suffix() { my( $self ) = @_; my $name = 'oyl '; $name .= blessed $self->{halide} ? $self->{halide}->prefix : $elements{$self->{halide}{symbol}}->{prefix}; $name =~ s/[ao]$/ide/; return $name; } sub _cmp_instances { my( $A, $B ) = @_; # Halides are either simple atoms or various cyano groups my $A_symbol = blessed $A->{halide} ? 'N' : $A->{halide}{symbol}; my $B_symbol = blessed $B->{halide} ? 'N' : $B->{halide}{symbol}; return $A_symbol cmp $B_symbol; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Aldehyde.pm000066400000000000000000000012631463750375500237520ustar00rootroot00000000000000package ChemOnomatopist::Group::Aldehyde; # ABSTRACT: Aldehyde group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Name; sub new { my( $class, $ketone ) = @_; return bless { ketone => $ketone }, $class; } sub element() { 'C' } sub is_part_of_chain() { 1 } sub prefix { ChemOnomatopist::Name->new( 'formyl' ) } sub suffix() { my( $self ) = @_; my $name = $self->{ketone}->suffix; $name =~ s/one$/al/; $name = 'selenal' if $name eq 'selal'; return $name; } sub multisuffix { ChemOnomatopist::Name->new( 'carbaldehyde' ) } sub suffix_if_cycle_substituent { ChemOnomatopist::Name->new( 'carbaldehyde' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Amide.pm000066400000000000000000000013111463750375500232440ustar00rootroot00000000000000package ChemOnomatopist::Group::Amide; # ABSTRACT: Amide group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Name; sub new { my( $class, $parent, $ketone ) = @_; return bless { parent => $parent, ketone => $ketone }, $class; } sub element() { 'N' } sub is_terminal() { 1 } sub prefix { ChemOnomatopist::Name->new( 'amido' ) } my %infix = ( S => 'thio', Se => 'seleno', Te => 'telluro', ); sub suffix { my( $self ) = @_; my $suffix = ChemOnomatopist::Name->new; if( $self->{ketone} && exists $infix{$self->{ketone}->element} ) { $suffix .= $infix{$self->{ketone}->element}; } return $suffix . 'amide'; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Amidine.pm000066400000000000000000000064251463750375500236060ustar00rootroot00000000000000package ChemOnomatopist::Group::Amidine; # ABSTRACT: Amidine group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::, ChemOnomatopist::Chain::; use ChemOnomatopist::Name::Part::Locants; use ChemOnomatopist::Name::Part::Stem; use Clone qw( clone ); use List::Util qw( first ); use Scalar::Util qw( blessed ); use Set::Object qw( set ); sub new { my( $class, $graph, @vertices ) = @_; my( $central_atom, @others ) = @vertices; my $parent = first { !set( @others )->has( $_ ) } $graph->neighbours( $central_atom ); my $is_carboximidamide = $central_atom->{symbol} eq 'C'; if( $central_atom->{symbol} eq 'C' && (!$parent || !$graph->groups( $parent )) ) { $is_carboximidamide = ''; my $temp = clone $central_atom; for (@others) { next unless $graph->has_edge( $central_atom, $_ ); $graph->delete_edge( $central_atom, $_ ); $graph->add_path( $central_atom, $temp, $_ ); } @vertices = ( $temp, @others ); } return bless { graph => $graph, vertices => \@vertices, is_carboximidamide => $is_carboximidamide }, $class; } sub nonstandard_valence_positions() { my( $self ) = @_; return @{$self->{nonstandard_valence_positions}} if $self->{nonstandard_valence_positions}; my @vertices = $self->vertices; my @nonstandard_valence_positions; for (1..$#vertices) { # Nonstandard valence of the central atom is not important, hence skipped next if blessed $vertices[$_]; next if ChemOnomatopist::element( $vertices[$_] ) && ChemOnomatopist::element( $vertices[$_] ) eq 'C'; next unless exists $vertices[$_]->{valence}; push @nonstandard_valence_positions, $_; } $self->{nonstandard_valence_positions} = \@nonstandard_valence_positions; return @nonstandard_valence_positions; } sub needs_ane_suffix() { 1 } sub needs_heteroatom_locants() { '' } sub needs_heteroatom_names() { '' } sub needs_substituent_locants { '' } my %prefixes = ( C => '', S => 'sulf', Se => 'selen', Te => 'tellur' ); # CHECKME: Is Te correct? sub prefix() { my( $self ) = @_; my( $central_atom, @others ) = $self->vertices; return 'carbamimidoyl' unless $central_atom->{symbol} eq 'S'; my $N = grep { ChemOnomatopist::element( $_ ) eq 'N' } @others; my $O = grep { ChemOnomatopist::element( $_ ) eq 'O' } @others; my $name = ChemOnomatopist::Name::Part::Locants->new( 'S-' )->to_name; $name .= 'amino'; $name .= 'sulfon' if $N == 2 && $O == 1; $name .= 'sulfonodi' if $N == 3 && !$O; $name .= 'sulfin' if $N == 2 && !$O; $name .= 'imidoyl'; return $name; } sub suffix() { my( $self ) = @_; my( $central_atom, @others ) = $self->vertices; my $name = ChemOnomatopist::Name::Part::Stem->new( $prefixes{$central_atom->{symbol}} )->to_name; $name .= 'carbox' if $self->{is_carboximidamide}; if( $central_atom->{symbol} ne 'C' ) { my $N = grep { ChemOnomatopist::element( $_ ) eq 'N' } @others; my $O = grep { ChemOnomatopist::element( $_ ) eq 'O' } @others; $name .= 'in' if $N == 2; $name .= 'on' if $N == 2 && $O == 1; $name .= 'onodi' if $N == 3; } $name .= 'imidamide'; return $name; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Amine.pm000066400000000000000000000005121463750375500232600ustar00rootroot00000000000000package ChemOnomatopist::Group::Amine; # ABSTRACT: Amino group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Name; sub element() { 'N' } sub is_terminal() { 1 } sub prefix() { ChemOnomatopist::Name->new( 'amino' ) } sub suffix() { ChemOnomatopist::Name->new( 'amine' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Carbaldehyde.pm000066400000000000000000000014771463750375500246110ustar00rootroot00000000000000package ChemOnomatopist::Group::Carbaldehyde; # ABSTRACT: Carbaldehyde group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Name; sub new { my( $class, $aldehyde ) = @_; return bless { ketone => $aldehyde->{ketone} }, $class; } sub element() { 'C' } sub prefix() { my( $self ) = @_; return ChemOnomatopist::Name->new( 'formyl' ) if $self->{ketone}->element eq 'O'; my $name = ChemOnomatopist::Name->new( 'methane' ); my $element = $elements{$self->{ketone}->element}->{prefix}; $element =~ s/a$/oyl/; return $name->append_stem( $element ); } my %suffixes = ( O => '', S => 'othi', Se => 'oselen', Te => 'otellan' ); sub suffix() { 'carb' . $suffixes{$_[0]->{ketone}->element} . 'aldehyde' } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Carbonitrile.pm000066400000000000000000000004531463750375500246500ustar00rootroot00000000000000package ChemOnomatopist::Group::Carbonitrile; # ABSTRACT: Carbonitrile group # VERSION use strict; use warnings; use ChemOnomatopist::Name; use parent ChemOnomatopist::Group::; sub prefix { ChemOnomatopist::Name->new( 'cyano' ) } sub suffix { ChemOnomatopist::Name->new( 'carbonitrile' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Carboxyl.pm000066400000000000000000000103551463750375500240200ustar00rootroot00000000000000package ChemOnomatopist::Group::Carboxyl; # ABSTRACT: Carboxyl group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Group::Hydroperoxide; use ChemOnomatopist::Group::Ketone; use ChemOnomatopist::Name; use List::Util qw( all any uniq ); use Scalar::Util qw( blessed ); sub new() { my( $class, $hydroxy, $ketone ) = @_; die "cannot handle carboxyl group attached to nongroup atoms (most likely hydrazines)\n" unless blessed $ketone; die "cannot handle carboxyl group attached to anything else than ketone\n" unless $ketone->isa( ChemOnomatopist::Group::Ketone:: ); return bless { hydroxy => $hydroxy, ketone => $ketone }, $class; } sub element() { 'C' } sub element_suffix(@) { my @elements = @_; my @prefixes = sort map { s/a$/o/; $_ } map { $elements{$_}->{prefix} } grep { $_ ne 'O' } @elements; my $name = ChemOnomatopist::Name->new; if( @prefixes == 2 && uniq( @elements ) == 1 ) { $name->append_multiplier( 'di' ); shift @prefixes; } for (@prefixes) { $name->append_stem( $_ ); } return $name; } sub prefix() { my( $self ) = @_; my $hydroxy = $self->{hydroxy}; my $ketone = $self->{ketone}; my $name = ChemOnomatopist::Name->new; if( ChemOnomatopist::element( $hydroxy ) ne 'O' || $ketone->element ne 'O' ) { $name .= $hydroxy->prefix; $name->bracket unless $name->is_simple; $name->append_stem( 'carbono' ); $name .= $ketone->suffix if $ketone->element ne 'O'; } else { $name->append_stem( 'carboxy' ); } $name->[-1]{value} =~ s/(ne|o)$/yl/; return $name; } sub suffix() { my( $self ) = @_; my $hydroxy = $self->{hydroxy}; my $ketone = $self->{ketone}; my @elements = map { ChemOnomatopist::element( $_ ) } ( $hydroxy, $ketone ); if( all { $_ eq 'O' } @elements ) { return ChemOnomatopist::Name->new( 'oic acid' ) if blessed $hydroxy && !$hydroxy->charge; return ChemOnomatopist::Name->new( 'oate' ); } return element_suffix( @elements ) . 'ate' unless blessed $hydroxy && !$hydroxy->charge; return element_suffix( @elements ) . 'ic acid' if uniq( @elements ) == 1; return element_suffix( @elements ) . ('ic ' . $elements[0] . '-acid'); } sub multisuffix() { my( $self ) = @_; my $hydroxy = $self->{hydroxy}; my $ketone = $self->{ketone}; my $name = ChemOnomatopist::Name->new( 'carbo' ); if( blessed $hydroxy && $hydroxy->isa( ChemOnomatopist::Group::Hydroperoxide:: ) ) { my @hydroxy_elements = map { ChemOnomatopist::element( $_ ) } @{$hydroxy->{atoms}}; my $hydroxy_part = element_suffix( @hydroxy_elements ); my $ketone_part = element_suffix( $ketone->element ); $hydroxy_part .= $ketone->element eq 'O' ? 'peroxoic' : 'peroxo'; if( any { $_ ne 'O' } @hydroxy_elements ) { $hydroxy_part->bracket; } $ketone_part .= 'ic' if $ketone->element ne 'O'; $name .= $hydroxy_part; $name .= $ketone_part; local $" = ''; return $name . " @hydroxy_elements-acid" if scalar( uniq @hydroxy_elements ) > 1; return $name . ' acid'; } else { my @elements = ( ChemOnomatopist::element( $hydroxy ), $ketone->element ); if( all { $_ eq 'O' } @elements ) { return ChemOnomatopist::Name->new( 'carboxylic acid' ) if blessed $hydroxy && !$hydroxy->charge; return ChemOnomatopist::Name->new( 'carboxylate' ); } $name .= element_suffix( @elements ); return $name . 'ate' unless blessed $hydroxy && !$hydroxy->charge; return $name . 'ic acid' if uniq( @elements ) == 1; return $name . ('ic ' . $elements[0] . '-acid'); } } sub suffix_if_cycle_substituent() { $_[0]->multisuffix } # CHECKME: Not sure if the order is correct here sub _cmp_instances($$) { my( $A, $B ) = @_; return ChemOnomatopist::element( $A->{ketone} ) cmp ChemOnomatopist::element( $B->{ketone} ) || ChemOnomatopist::element( $A->{hydroxy} ) cmp ChemOnomatopist::element( $B->{hydroxy} ); } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Cyanide.pm000066400000000000000000000005431463750375500236070ustar00rootroot00000000000000package ChemOnomatopist::Group::Cyanide; # ABSTRACT: Cyanide group # VERSION use strict; use warnings; use ChemOnomatopist::Name; use parent ChemOnomatopist::Group::; sub prefix() { ChemOnomatopist::Name->new( 'cyano' ) } sub suffix() { ChemOnomatopist::Name->new( 'nitrile' ) } sub multisuffix() { ChemOnomatopist::Name->new( 'carbonitrile' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Diazene.pm000066400000000000000000000017371463750375500236200ustar00rootroot00000000000000package ChemOnomatopist::Group::Diazene; # ABSTRACT: Diazene group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::, ChemOnomatopist::Chain::; use ChemOnomatopist::Name; use ChemOnomatopist::Name::Part::Multiplier; sub new { my( $class, $graph, @vertices ) = @_; return bless { graph => $graph, vertices => \@vertices }, $class; } sub candidates() { my( $self ) = @_; my @chains = ( $self, ChemOnomatopist::Group::Diazene->new( $self->graph, reverse $self->vertices ) ); $chains[1]->{candidate_for} = $self; return @chains; } sub needs_heteroatom_locants() { '' } sub needs_heteroatom_names() { '' } sub needs_suffix_locant() { $_[0]->number_of_branches != 2 } sub prefix() { ChemOnomatopist::Name::Part::Multiplier->new( 'di' )->to_name . ChemOnomatopist::Name->new( 'azenyl' ) } sub suffix() { ChemOnomatopist::Name::Part::Multiplier->new( 'di' )->to_name . ChemOnomatopist::Name->new( 'azene' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Ester.pm000066400000000000000000000013411463750375500233120ustar00rootroot00000000000000package ChemOnomatopist::Group::Ester; # ABSTRACT: Ester group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::, ChemOnomatopist::Chain::; use ChemOnomatopist; sub new { my( $class, $graph, $C, $ketone, $O ) = @_; die "cannot name esters yet\n" if $ChemOnomatopist::CAUTIOUS; return bless { graph => $graph, vertices => [ $C, $O ] }, $class; } sub needs_heteroatom_locants() { '' } sub needs_heteroatom_names() { '' } sub needs_substituent_locants() { '' } sub prefix() { my( $self ) = @_; if( $self->parent && $self->graph->has_edge( $self->parent, $self->{vertices}[0] ) ) { die "cannot handle complicated esters\n"; } return 'oxy'; } sub suffix() { 'anoate' } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Ether.pm000066400000000000000000000005021463750375500232750ustar00rootroot00000000000000package ChemOnomatopist::Group::Ether; # ABSTRACT: Ether group # VERSION use strict; use warnings; use ChemOnomatopist::Name; use parent ChemOnomatopist::Group::; sub element() { 'O' } sub is_part_of_chain() { 1 } sub prefix() { ChemOnomatopist::Name->new( 'oxy' ) } sub suffix() { ChemOnomatopist::Name->new } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Guanidine.pm000066400000000000000000000040521463750375500241350ustar00rootroot00000000000000package ChemOnomatopist::Group::Guanidine; # ABSTRACT: Guanidine group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::, ChemOnomatopist::Chain::; use Algorithm::Combinatorics qw( permutations ); use ChemOnomatopist::Name; use Chemistry::OpenSMILES qw( is_double_bond ); sub new { my( $class, $graph, $atom ) = @_; # Double ! is used here as is_double_bond() returns 1 or undef which is a bug in Chemistry::OpenSMILES? my @vertices = sort { !!is_double_bond( $graph, $atom, $a ) <=> !!is_double_bond( $graph, $atom, $b ) } $graph->neighbours( $atom ); my @orders = map { !!is_double_bond( $graph, $atom, $_ ) } @vertices; return bless { graph => $graph, vertices => \@vertices, is_double_bond => \@orders }, $class; } sub candidates() { my( $self ) = @_; my @candidates; if( $self->{is_double_bond}[2] ) { @candidates = ( $self, $self->copy ); $candidates[1]->{vertices} = [ map { $self->{vertices}[$_] } ( 1, 0, 2 ) ]; $candidates[1]->{candidate_for} = $self; } else { for (permutations([0, 1, 2])) { push @candidates, $self->copy; $candidates[-1]->{vertices} = [ map { $self->{vertices}[$_] } @$_ ]; $candidates[-1]->{candidate_for} = $self; } } return @candidates; } sub copy() { my( $self ) = @_; return bless { %$self }; } sub needs_heteroatom_locants() { '' } sub needs_heteroatom_names() { '' } sub needs_substituent_locants() { 1 } # FIXME: There may be identical substituents, what to do then? sub locants(@) { my $self = shift; return map { 'N' . "'" x $_ } @_; } # Two kinds exist per BBv2 P-66.4.1.2.1.3 sub prefix { my( $self ) = @_; if( $self->{is_double_bond}[2] && $self->graph->degree( $self->{vertices}[2] ) > 2 ) { return ChemOnomatopist::Name->new( '[(diaminomethylidene)amino]' ); } else { return ChemOnomatopist::Name->new( '(carbamimidoylamino)' ); } } sub suffix { ChemOnomatopist::Name->new( 'guanidine' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Hydrazide.pm000066400000000000000000000017351463750375500241620ustar00rootroot00000000000000package ChemOnomatopist::Group::Hydrazide; # ABSTRACT: Hydrazide group # VERSION use strict; use warnings; use ChemOnomatopist::Name; use parent ChemOnomatopist::Group::, ChemOnomatopist::Chain::; sub new { my( $class, $graph, $ketone, @vertices ) = @_; return bless { graph => $graph, ketone => $ketone, vertices => \@vertices }, $class; } sub needs_ane_suffix() { 1 } sub needs_heteroatom_locants() { '' } sub needs_heteroatom_names() { '' } sub needs_substituent_locants() { my( $self ) = @_; return $self->number_of_branches > 1 && $self->number_of_branches < $self->max_valence; } sub locants(@) { my $self = shift; return map { $_ == 0 ? "N'" : $_ == 1 ? 'N' : $_ - 1 } @_; } my %suffixes = ( O => '', S => 'thio', Se => 'seleno', Te => 'telluro' ); sub prefix() { ChemOnomatopist::Name->new( $suffixes{$_[0]->{ketone}->element} . 'hydrazidyl' ) } sub suffix() { ChemOnomatopist::Name->new( $suffixes{$_[0]->{ketone}->element} . 'hydrazide' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Hydrazine.pm000066400000000000000000000022221463750375500241640ustar00rootroot00000000000000package ChemOnomatopist::Group::Hydrazine; # ABSTRACT: Hydrazine group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::, ChemOnomatopist::Chain::; use Chemistry::OpenSMILES qw( is_double_bond ); use List::Util qw( any first ); sub new { my( $class, $graph, @vertices ) = @_; return bless { graph => $graph, vertices => \@vertices }, $class; } sub candidates() { my( $self ) = @_; my @chains = ( $self, ChemOnomatopist::Group::Hydrazine->new( $self->graph, reverse $self->vertices ) ); $chains[1]->{candidate_for} = $self; return @chains; } sub needs_heteroatom_locants() { '' } sub needs_heteroatom_names() { '' } sub needs_substituent_locants() { my( $self ) = @_; my $graph = $self->graph; my $parent = $self->{parent}; if( $parent ) { my $vertex = first { $graph->has_edge( $parent, $_ ) } $self->vertices; return '' if is_double_bond( $self->graph, $vertex, $parent ); } return 1 if $self->number_of_isotopes; return $self->number_of_branches > 1 && $self->number_of_branches < $self->max_valence; } sub prefix() { 'hydrazinyl' } sub suffix() { 'hydrazine' } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Hydroperoxide.pm000066400000000000000000000047711463750375500250670ustar00rootroot00000000000000package ChemOnomatopist::Group::Hydroperoxide; # ABSTRACT: Hydroperoxide group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Name; use List::Util qw( all any ); sub new { my( $class, @atoms ) = @_; return bless { atoms => \@atoms }, $class; } sub element() { $_[0]->{atoms}[0]{symbol} } sub is_terminal() { 1 } sub prefix() { my( $self ) = @_; my @elements = map { ChemOnomatopist::element( $_ ) } @{$self->{atoms}}; return ChemOnomatopist::Name->new( 'hydroperoxy' ) if all { $_ eq 'O' } @elements; if( all { $_ eq 'S' } @elements ) { my $name = ChemOnomatopist::Name->new; $name->append_multiplier( 'di' ); $name->append_stem( 'sulfanyl' ); return $name; } my $name = ChemOnomatopist::Name->new; for my $element (reverse @elements) { # FIXME: Incomplete if( $element eq 'O' ) { $name .= 'hydr' unless $name; $name->append_stem( 'oxy' ); } $name->append_stem( 'sulfanyl' ) if $element eq 'S'; $name->append_stem( 'selanyl' ) if $element eq 'Se'; $name->append_stem( 'tellanyl' ) if $element eq 'Te'; } return $name; } sub suffix() { my( $self ) = @_; my @elements = map { ChemOnomatopist::element( $_ ) } @{$self->{atoms}}; my $name = ChemOnomatopist::Name->new; if( all { $_ eq 'O' } @elements ) { $name->append_stem( 'peroxol' ); } else { if( $elements[0] eq $elements[1] ) { $name->append_multiplier( 'di' ); my $element_prefix = $elements{$elements[0]}->{prefix}; $element_prefix =~ s/a$/o/; $name .= $element_prefix; } else { $name = '-' . join( '', @elements ) . '-' . join '', sort map { s/a$/o/; $_ } map { $elements{$_}->{prefix} } grep { $_ ne 'O' } @elements; $name = ChemOnomatopist::Name->new( $name ); } $name->append_stem( 'peroxol' ); } if( any { exists $_->{charge} && $_->{charge} == -1 } @{$self->{atoms}} ) { $name .= 'ate'; } return $name; } # CHECKME: Not sure if this is right, but at least some tests pass now. sub _cmp_instances { my( $A, $B ) = @_; return scalar( grep { ChemOnomatopist::element( $_ ) eq 'O' } @{$B->{atoms}}) <=> scalar( grep { ChemOnomatopist::element( $_ ) eq 'O' } @{$A->{atoms}}); } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Hydroxy.pm000066400000000000000000000025361463750375500237050ustar00rootroot00000000000000package ChemOnomatopist::Group::Hydroxy; # ABSTRACT: Hydroxy group # VERSION use strict; use warnings; use ChemOnomatopist::Name; use ChemOnomatopist::Name::Part::Isotope; use parent ChemOnomatopist::Group::; sub new { my( $class, $atom ) = @_; return bless { atom => $atom }, $class; } sub element() { $_[0]->{atom}{symbol} } sub charge() { ChemOnomatopist::charge( $_[0]->{atom} ) } # From BBv2 P-63.1.5 my %prefixes = ( O => 'hydroxy', S => 'sulfanyl', Se => 'selanyl', Te => 'tellanyl' ); my %suffixes = ( O => 'ol', S => 'thiol', Se => 'selenol', Te => 'tellurol' ); sub prefix { my( $self ) = @_; return ChemOnomatopist::Name->new( $prefixes{$self->element} ); } sub suffix { my( $self ) = @_; my $suffix = ''; # FIXME: Isotopes have to come inside the same parenthesis if( exists $self->{atom}{isotope} ) { $suffix = '(' . $self->{atom}{isotope} . $self->element . ')'; } if( exists $self->{atom}{h_isotope} && @{$self->{atom}{h_isotope}} && defined $self->{atom}{h_isotope}[0] ) { $suffix = '(' . $self->{atom}{h_isotope}[0] . 'H)'; } my $name = ChemOnomatopist::Name->new; $name .= ChemOnomatopist::Name::Part::Isotope->new( $suffix ) if $suffix; return $name . $suffixes{$self->element}; } sub _cmp_instances { my( $A, $B ) = @_; return $A->element cmp $B->element } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Imine.pm000066400000000000000000000005631463750375500232760ustar00rootroot00000000000000package ChemOnomatopist::Group::Imine; # ABSTRACT: Imine group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Name; sub element() { 'N' } sub is_terminal() { 1 } sub needs_multiple_bond_suffix() { '' } sub prefix() { ChemOnomatopist::Name->new( 'imino' ) } sub suffix() { ChemOnomatopist::Name->new( 'imine' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Isocyanate.pm000066400000000000000000000010071463750375500243260ustar00rootroot00000000000000package ChemOnomatopist::Group::Isocyanate; # ABSTRACT: Isocyanate group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Name; # Should be prefix-only as per P-61.8 sub is_prefix_only() { 1 } my %infix = ( O => '', S => 'thio', Se => 'seleno', Te => 'telluro', ); sub prefix() { ChemOnomatopist::Name->new( 'iso' . $infix{$_[0]->element} . 'cyanato' ) } sub suffix() { ChemOnomatopist::Name->new( 'iso' . $infix{$_[0]->element} . 'cyanate' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Isocyanide.pm000066400000000000000000000003601463750375500243170ustar00rootroot00000000000000package ChemOnomatopist::Group::Isocyanide; # ABSTRACT: Isocyanide group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; sub prefix() { 'isocyano' } sub suffix() { 'isocyanide' } # CHECKME: May be incorrect 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Ketone.pm000066400000000000000000000013061463750375500234560ustar00rootroot00000000000000package ChemOnomatopist::Group::Ketone; # ABSTRACT: Ketone group # VERSION use strict; use warnings; use ChemOnomatopist::Name; use parent ChemOnomatopist::Group::; # From BBv2 P-64.6.1 my %prefixes = ( O => 'oxo', S => 'sulfanylidene', Se => 'selanylidene', Te => 'tellanylidene' ); my %suffixes = ( O => 'one', S => 'thione', Se => 'selone', Te => 'tellone' ); sub needs_multiple_bond_suffix { '' } sub prefix { my( $self ) = @_; return ChemOnomatopist::Name->new( $prefixes{$self->element} ); } sub suffix { my( $self ) = @_; return ChemOnomatopist::Name->new( $suffixes{$self->element} ); } sub _cmp_instances { my( $A, $B ) = @_; return $A->element cmp $B->element; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Nitramide.pm000066400000000000000000000005011463750375500241410ustar00rootroot00000000000000package ChemOnomatopist::Group::Nitramide; # ABSTRACT: Nitramide group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Name; sub element() { 'N' } sub prefix() { ChemOnomatopist::Name->new( 'nitramido' ) } sub suffix() { ChemOnomatopist::Name->new( 'nitramide' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Nitro.pm000066400000000000000000000004241463750375500233240ustar00rootroot00000000000000package ChemOnomatopist::Group::Nitro; # ABSTRACT: Nitro group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Name; sub element() { 'N' } sub is_prefix_only() { 1 } sub prefix { ChemOnomatopist::Name->new( 'nitro' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Nitroso.pm000066400000000000000000000007301463750375500236660ustar00rootroot00000000000000package ChemOnomatopist::Group::Nitroso; use strict; use warnings; # ABSTRACT: Nitroso group or its analogue # VERSION use parent ChemOnomatopist::Group::; use ChemOnomatopist::Elements qw( %elements ); sub is_prefix_only() { return 1 } # Compiled from BBv2 Table 5.1 (P-59.1.9) sub prefix { my( $self ) = @_; return 'nitroso' if $self->element eq 'N'; my $prefix = $elements{$self->element}{prefix}; $prefix =~ s/a$/osyl/; return $prefix; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/NoncarbonOxoacid.pm000066400000000000000000000055611463750375500254660ustar00rootroot00000000000000package ChemOnomatopist::Group::NoncarbonOxoacid; # ABSTRACT: Mononuclear noncarbon oxoacid as per BBv3 P-67.1.1.1 # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist; use ChemOnomatopist::Group::Hydroxy; use ChemOnomatopist::Group::Ketone; use ChemOnomatopist::Group::Nitro; use ChemOnomatopist::Group::Sulfinyl; use ChemOnomatopist::Group::Sulfonyl; use ChemOnomatopist::Group::XO3; use Scalar::Util qw( blessed ); sub new { my( $class, $atom, @attachments ) = @_; return bless { attachments => \@attachments, atom => $atom }, $class; } sub element() { ChemOnomatopist::element( $_[0]->{atom} ) } sub is_part_of_chain() { 1 } my %elements = ( As => 'ars', N => 'az', B => 'bor', Br => 'brom', Cl => 'chlor', F => 'fluor', I => 'iod', P => 'phosph', Se => 'selen', Si => 'silic', Sb => 'stib', S => 'sulfur', Te => 'tellur', ); my %suffixes = ( 0 => { 1 => 'inous', 2 => 'onous', 3 => 'orous', 4 => 'ic' }, 1 => { 1 => 'inic', 2 => 'onic', 3 => 'oric' }, 2 => { 1 => 'ic', 2 => 'ic' }, ); sub suffix { my( $self ) = @_; my $name = $elements{$self->element}; if( blessed $self->{atom} && $self->{atom}->isa( ChemOnomatopist::Group::XO3:: ) ) { $name = 'per' . $name . 'ic'; } else { my @attachments = @{$self->{attachments}}; my $hydroxy = grep { blessed $_ && $_->isa( ChemOnomatopist::Group::Hydroxy:: ) } @attachments; my $ketones = grep { blessed $_ && $_->isa( ChemOnomatopist::Group::Ketone:: ) } @attachments; $hydroxy += 1 if blessed $self->{atom} && $self->{atom}->isa( ChemOnomatopist::Group::Nitro:: ); $ketones += 1 if blessed $self->{atom} && $self->{atom}->isa( ChemOnomatopist::Group::Sulfinyl:: ); $ketones += 2 if blessed $self->{atom} && $self->{atom}->isa( ChemOnomatopist::Group::Sulfonyl:: ); if( $self->element eq 'B' ) { $name .= 'inic' if $hydroxy == 1; $name .= 'onic' if $hydroxy == 2; $name .= 'ic' if $hydroxy == 3; } elsif( $self->element eq 'N' ) { $name .= 'inic' if $hydroxy == 2 && !$ketones; $name = 'nitric' if $hydroxy == 2 && $ketones == 1; $name .= 'onic' if $hydroxy == 3; $name = 'nitroric' if $hydroxy == 4; } elsif( $self->element =~ /^(F|Cl|Br|I)$/ ) { $name .= 'ous' if $hydroxy == 2 && $ketones == 1; $name .= 'ic' if $hydroxy == 1 && $ketones == 2; $name = 'hypo' . $name . 'ous' if $hydroxy == 1 && !$ketones; } elsif( $self->element =~ /^(S|Se|Te)$/ ) { $name .= 'ous' if $hydroxy == 2 && $ketones == 1; $name .= 'ic' if $hydroxy == 2 && $ketones == 2; } else { $name .= $suffixes{$ketones}->{$hydroxy}; } } return $name . ' acid'; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Peroxide.pm000066400000000000000000000006301463750375500240070ustar00rootroot00000000000000package ChemOnomatopist::Group::Peroxide; # ABSTRACT: Peroxide group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Name; sub new { my( $class, @atoms ) = @_; return bless { atoms => \@atoms }, $class; } sub element() { 'O' } sub prefix() { ChemOnomatopist::Name->new( 'peroxy' ) } sub suffix() { ChemOnomatopist::Name->new( 'peroxolate' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Sulfinamide.pm000066400000000000000000000007171463750375500244760ustar00rootroot00000000000000package ChemOnomatopist::Group::Sulfinamide; # ABSTRACT: Sulfinamide group or its Se/Te equivalent # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Name; my %prefixes = ( S => 'sulfinamide', Se => 'seleninamide', Te => 'tellurinamide', ); sub prefix { ChemOnomatopist::Name->new( 'sulfinamido' ) } # FIXME: May be incorrect sub suffix { ChemOnomatopist::Name->new( $prefixes{$_[0]->element} ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/SulfinicAcid.pm000066400000000000000000000005111463750375500245630ustar00rootroot00000000000000package ChemOnomatopist::Group::SulfinicAcid; # ABSTRACT: Sulfinic acid group # VERSION use strict; use warnings; use ChemOnomatopist::Group::SulfonicAcid; use parent ChemOnomatopist::Group::SulfonicAcid::; use ChemOnomatopist::Name; # From BBv2 P-65.3.0 sub element_prefix() { ChemOnomatopist::Name->new( 'sulfino' ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Sulfinyl.pm000066400000000000000000000005571463750375500240450ustar00rootroot00000000000000package ChemOnomatopist::Group::Sulfinyl; # ABSTRACT: Sulfinyl group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Name; my %prefixes = ( S => 'sulfinyl', Se => 'seleninyl', Te => 'tellurinyl', ); sub is_prefix_only() { 1 } sub prefix { ChemOnomatopist::Name->new( $prefixes{$_[0]->element} ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Sulfonamide.pm000066400000000000000000000007151463750375500245020ustar00rootroot00000000000000package ChemOnomatopist::Group::Sulfonamide; # ABSTRACT: Sulfonamide group or its Se/Te equivalent # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Name; my %prefixes = ( S => 'sulfonamide', Se => 'selenonamide', Te => 'telluronamide', ); sub prefix { ChemOnomatopist::Name->new( 'sulfamoyl' ) } # FIXME: May be incorrect sub suffix { ChemOnomatopist::Name->new( $prefixes{$_[0]->element} ) } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/SulfonicAcid.pm000066400000000000000000000111331463750375500245730ustar00rootroot00000000000000package ChemOnomatopist::Group::SulfonicAcid; # ABSTRACT: Sulfonic acid group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Name; use ChemOnomatopist::Util qw( array_frequencies ); use List::Util qw( all any first uniq ); use Scalar::Util qw( blessed ); sub new { my( $class, $element, @attachments ) = @_; return bless { attachments => \@attachments, element => $element }, $class; } sub hydroxy() { my( $self ) = @_; return first { blessed $_ && ( $_->isa( ChemOnomatopist::Group::Hydroxy:: ) || $_->isa( ChemOnomatopist::Group::Hydroperoxide:: ) ) } @{$self->{attachments}}; } my %suffixes = ( N => 'imido', O => '', S => 'thio', Se => 'seleno', Te => 'telluro' ); sub attachments_part() { my( $self ) = @_; my @attachments = @{$self->{attachments}}; my $hydroxy = $self->hydroxy; my @non_hydroxy = grep { $_ != $hydroxy } @attachments; my @non_hydroxy_elements = map { ChemOnomatopist::element( $_ ) } @non_hydroxy; my %elements = array_frequencies @non_hydroxy_elements; if( $hydroxy->isa( ChemOnomatopist::Group::Hydroxy:: ) ) { $elements{$hydroxy->element}++; } my @names; for (keys %elements) { next unless $suffixes{$_}; my $name = ChemOnomatopist::Name->new; $name->append_multiplier( ChemOnomatopist::IUPAC_numerical_multiplier( $elements{$_} ) ) if $elements{$_} > 1; $name->append_element( $suffixes{$_} ); push @names, $name; } if( $hydroxy->isa( ChemOnomatopist::Group::Hydroperoxide:: ) ) { my $suffix = $hydroxy->suffix; $suffix->[ 0]{value} =~ s/^-[^\-]+-//; $suffix->[-1]{value} =~ s/l$//; $suffix->bracket unless $suffix eq 'peroxo'; # non-OO needs brackets push @names, $suffix; } my $name = ChemOnomatopist::Name->new; for (sort { _cmp_names( $a, $b ) } @names) { $name->[-1]{value} =~ s/o$// if @$name && $_ eq 'imido'; $name .= $_; } if( $name =~ /\)$/ ) { $name->[-2]{value} .= 'ic'; $name .= ' '; } else { $name->[-1]{value} =~ s/(imid)o$/$1/; $name .= 'ic '; } return $name; } # From BBv2 P-65.3.0 and Table 4.3 # FIXME: prefix() has to enumerate elements in the attachments sub element_prefix() { my( $self ) = @_; my $suffix = $suffixes{$self->element}; $suffix = 'sulfo' if $self->element eq 'S'; $suffix = 'selenono' if $self->element eq 'Se'; return ChemOnomatopist::Name->new( $suffix ); } sub prefix() { $_[0]->element_prefix } sub suffix() { my( $self ) = @_; my @attachments = @{$self->{attachments}}; my $hydroxy = first { blessed $_ && ( $_->isa( ChemOnomatopist::Group::Hydroxy:: ) || $_->isa( ChemOnomatopist::Group::Hydroperoxide:: ) ) } @attachments; my @non_hydroxy = grep { $_ != $hydroxy } @attachments; my @non_hydroxy_elements = map { ChemOnomatopist::element( $_ ) } @non_hydroxy; if( $hydroxy->isa( ChemOnomatopist::Group::Hydroxy:: ) && $hydroxy->element eq 'O' && all { $_ eq 'O' } @non_hydroxy_elements ) { my $name = $self->element_prefix; $name->[-1]{value} =~ s/no$//; return $name .= 'nic acid'; } my $name = $self->element_prefix; my $attachments_part = $self->attachments_part; $name .= $attachments_part =~ /^i/ ? 'n' : 'no'; $name .= $self->attachments_part; if( $hydroxy->isa( ChemOnomatopist::Group::Hydroxy:: ) ) { # Needed if at least one non-hydroxy element is different (and not N) if( any { $_ ne 'N' && $_ ne $hydroxy->element } @non_hydroxy_elements ) { $name .= $hydroxy->element . '-'; } } else { # Peroxides need explicit elements if: # a) elements are different # b) elements are OO and there is a non-N and non-O element among non-hydroxy elements my @elements = map { ChemOnomatopist::element( $_ ) } @{$hydroxy->{atoms}}; if( scalar( uniq @elements ) == 2 || ((all { $_ eq 'O' } @elements) && any { $_ ne 'N' && $_ ne 'O' } @non_hydroxy_elements) ) { $name .= join( '', @elements ) . '-'; } } return $name . 'acid'; } sub _cmp_names { my( $A, $B ) = @_; my @A = @$A; my @B = @$B; shift @A if $A->starts_with_multiplier; shift @B if $B->starts_with_multiplier; if( $A->is_enclosed ) { shift @A; pop @A; } if( $B->is_enclosed ) { shift @B; pop @B; } local $" = ''; return "@A" cmp "@B"; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Sulfonyl.pm000066400000000000000000000005611463750375500240460ustar00rootroot00000000000000package ChemOnomatopist::Group::Sulfonyl; # ABSTRACT: Sulfonyl group # VERSION use strict; use warnings; use parent ChemOnomatopist::Group::; use ChemOnomatopist::Name; my %prefixes = ( S => 'sulfonyl', Se => 'selenonyl', Te => 'telluronyl', ); sub prefix() { ChemOnomatopist::Name->new( $prefixes{$_[0]->element} ) } sub is_prefix_only() { 1 } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/Urea.pm000066400000000000000000000030631463750375500231270ustar00rootroot00000000000000package ChemOnomatopist::Group::Urea; # ABSTRACT: Urea group # VERSION use strict; use warnings; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Group::Amide; use ChemOnomatopist::Name; use List::Util qw( any first ); use Scalar::Util qw( blessed ); use parent ChemOnomatopist::Group::, ChemOnomatopist::Chain::; sub new { my( $class, $graph, @vertices ) = @_; my $ketone = first { blessed $_ && $_->isa( ChemOnomatopist::Group::Ketone:: ) } @vertices; if( $graph->degree( $vertices[3] ) > $graph->degree( $vertices[2] ) ) { @vertices[2..3] = reverse @vertices[2..3]; } return bless { graph => $graph, vertices => \@vertices, ketone_element => $ketone->{element} }, $class; } sub heteroatom_positions() { my @not_applicable } sub needs_substituent_locants() { my( $self ) = @_; return '' if !$self->is_main && $self->number_of_branches == 2; return 1 if $self->number_of_branches > 1 && $self->number_of_branches < 4; return 1 if any { $_->has_substituent_locant } map { @$_ } $self->locant_names; return ''; } sub locants(@) { my $self = shift; return map { $_ ? 'N' . '\'' x ($_ - 2) : 1 } @_; } sub prefix() { ChemOnomatopist::Name->new( 'carbamoylamino' ) } sub suffix() { my( $self ) = @_; my $name = ChemOnomatopist::Name->new; if( $self->{ketone_element} ne 'O' ) { $name->append_element( $elements{$self->{ketone_element}}->{prefix} ); $name->[-1]->{value} =~ s/a$/o/; } return $name . 'urea'; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Group/XO3.pm000066400000000000000000000007111463750375500226410ustar00rootroot00000000000000package ChemOnomatopist::Group::XO3; # ABSTRACT: XO3 group # VERSION use strict; use warnings; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Name; use parent ChemOnomatopist::Group::; sub is_prefix_only() { 1 } # Compiled from BBv2 Table 5.1 (P-59.1.9) sub prefix { my( $self ) = @_; my $prefix = 'per' . $elements{$self->element}{prefix}; $prefix =~ s/a$/yl/; return ChemOnomatopist::Name->new( $prefix ); } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Isotope.pm000066400000000000000000000014631463750375500225630ustar00rootroot00000000000000package ChemOnomatopist::Isotope; # ABSTRACT: Isotope # VERSION use strict; use warnings; use Chemistry::Isotope qw( isotope_abundance ); sub new { my( $class, $element, $mass_number, $index, $locant ) = @_; return bless { element => $element, mass_number => $mass_number, index => $index, locant => $locant }, $class; } sub atomic_number() { my( $self ) = @_; my $abundance = isotope_abundance( $self->element ); my( $most_abundant ) = sort { $abundance->{$b} <=> $abundance->{$a} } keys %$abundance; return $most_abundant + 0; } sub element() { $_[0]->{element} } sub index() { $_[0]->{index} } sub locant() { $_[0]->{locant} } sub mass_number() { $_[0]->{mass_number} } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/MolecularGraph.pm000066400000000000000000000052141463750375500240440ustar00rootroot00000000000000package ChemOnomatopist::MolecularGraph; # ABSTRACT: Graph extension for molecular graphs # VERSION use strict; use warnings; use ChemOnomatopist::Util::Graph; use Graph::Undirected; use List::Util qw( all any ); use Set::Object qw( set ); use parent Graph::Undirected::; sub new { my $class = shift; if( ref $class ) { return bless $class->SUPER::new( refvertexed => 1 ); } elsif( @_ == 1 && $_[0]->isa( Graph::Undirected:: ) ) { return bless $_[0], $class; } else { return bless Graph::Undirected->new( @_, refvertexed => 1 ), $class; } } # Graph::copy() does not copy edge attributes sub copy() { my( $self ) = @_; my $copy = $self->SUPER::copy; for my $edge ($self->edges) { next unless $self->has_edge_attributes( @$edge ); $copy->set_edge_attributes( @$edge, $self->get_edge_attributes( @$edge ) ); } for my $attribute ($self->get_graph_attribute_names) { next if $attribute =~ /^_/; # Skip internal attributes $copy->set_graph_attribute( $attribute, $self->get_graph_attribute( $attribute ) ); } return $copy; } sub subgraph() { my( $self, @vertices ) = @_; if( all { ref $_ eq 'ARRAY' } @vertices ) { return bless $self->SUPER::subgraph( @vertices ); } else { return ChemOnomatopist::Util::Graph::subgraph( $self, @vertices ); } } sub add_group($) { my( $self, $group ) = @_; $self->set_graph_attribute( 'groups', [] ) unless $self->has_graph_attribute( 'groups' ); push @{$self->get_graph_attribute( 'groups' )}, $group; } sub delete_group($) { my( $self, $group ) = @_; return unless $self->has_graph_attribute( 'groups' ); @{$self->get_graph_attribute( 'groups' )} = grep { $_ != $group } @{$self->get_graph_attribute( 'groups' )}; } sub groups(@) { my( $self, @vertices ) = @_; return () unless $self->has_graph_attribute( 'groups' ); if( @vertices ) { my $vertices = set( @vertices ); return grep { $vertices <= set( $_->vertices ) } @{$self->get_graph_attribute( 'groups' )}; } else { return @{$self->get_graph_attribute( 'groups' )}; } } sub has_negative_charge() { my( $self ) = @_; return any { $_->{charge} && $_->{charge} < 0 } $self->vertices; } sub has_positive_charge() { my( $self ) = @_; return any { $_->{charge} && $_->{charge} > 0 } $self->vertices; } sub is_anion() { $_[0]->has_negative_charge && !$_[0]->has_positive_charge } sub is_cation() { !$_[0]->has_negative_charge && $_[0]->has_positive_charge } sub is_zwitterion() { $_[0]->has_negative_charge && $_[0]->has_positive_charge } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Name.pm000066400000000000000000000212741463750375500220230ustar00rootroot00000000000000package ChemOnomatopist::Name; # ABSTRACT: Chemical name # VERSION use strict; use warnings; use overload '.' => \&concatenate; use overload '.=' => \&append; use overload '""' => sub { join '', @{$_[0]->{name}} }; use overload 'eq' => sub { "$_[0]" eq "$_[1]" }; use overload 'cmp' => sub { ("$_[0]" cmp "$_[1]") * ($_[2] ? -1 : 1) }; use overload '@{}' => sub { $_[0]->{name} }; use ChemOnomatopist::Name::Part::AlkaneANSuffix; use ChemOnomatopist::Name::Part::Element; use ChemOnomatopist::Name::Part::Fusion; use ChemOnomatopist::Name::Part::Isotope; use ChemOnomatopist::Name::Part::Locants; use ChemOnomatopist::Name::Part::Locants::Substituent; use ChemOnomatopist::Name::Part::Multiplier; use ChemOnomatopist::Name::Part::Stem; use Clone qw( clone ); use List::Util qw( any ); use Scalar::Util qw( blessed ); sub new { my( $class, $name ) = @_; my @name_parts; if( defined $name && blessed $name ) { push @name_parts, $name; } elsif( defined $name && $name ne '' ) { push @name_parts, ChemOnomatopist::Name::Part::Stem->new( $name ); } return bless { name => \@name_parts }, $class; } # TODO: Implement vowel elision as written in BBv2 P-16.7 sub append($) { my( $self, $name ) = @_; return $self if $name eq ''; # Do not do anything if name is empty $self->[-1] =~ s/a$// if $name =~ /^[ai]/ && @$self; $self->[-1] =~ s/o$// if $name =~ /^o/ && @$self && $self->[-1] ne 'cyclo'; # If names are combined and the second one starts with a number, a separator is added. if( @$self && !$self->ends_with_locant && blessed $name && $name->isa( ChemOnomatopist::Name:: ) && $name =~ /^\d/ ) { $name->[0]->{value} = '-' . $name->[0]->{value}; } # CHECKME: Not sure this is a broad rule, or just confined to P-16.7.1 (d) if( blessed $name && $name->isa( ChemOnomatopist::Name:: ) && $self->ends_with_element && $name->starts_with_element && $self =~ /a$/ && $name =~ /^o/ ) { $self->[-1]{value} =~ s/a$//; } # FIXME: The following needlessly converts $name into string $name->[0]{value} =~ s/^-// if @$self && $self->[-1] =~ /-$/ && $name =~ /^-/; push @$self, blessed $name && $name->isa( ChemOnomatopist::Name:: ) ? @$name : $name; return $self; } sub append_element($) { my( $self, $element ) = @_; return $self->append( ChemOnomatopist::Name::Part::Element->new( $element ) ); } sub append_locants { my( $self, @locants ) = @_; return $self unless @locants; if( @$self && !$self->ends_with_locant ) { $self->append( ChemOnomatopist::Name::Part::Locants->new( '-' . join( ',', @locants ) . '-' ) ); } else { $self->append( ChemOnomatopist::Name::Part::Locants->new( join( ',', @locants ) . '-' ) ); } return $self; } sub append_multiplier($) { my( $self, $string ) = @_; return $self if $string eq ''; return $self->append( ChemOnomatopist::Name::Part::Multiplier->new( $string ) ); } sub append_stem($) { my( $self, $stem ) = @_; return $self->append( ChemOnomatopist::Name::Part::Stem->new( $stem ) ); } sub append_substituent_locant($) { my( $self, $locant ) = @_; $self->append( ChemOnomatopist::Name::Part::Locants::Substituent->new( '-' . $locant . '-' ) ); $self->{name} = [ 'tert-but' ] if $self eq '2-methylpropan-2-'; return $self; } sub append_suffix($) { my( $self, $suffix ) = @_; if( @$self ) { if( ( blessed $suffix && $suffix->remove_isotopes =~ /^[aeiouy]/) || (!blessed $suffix && $suffix =~ /^[aeiouy]/) ) { $self->[-2] =~ s/e$// if $self =~ /e-[0-9,]+-$/; # BBv2 P-16.7.1 (a) $self->[-1] =~ s/e$// if $self =~ /e$/; } $self->[-1] =~ s/a$// if $self->ends_with_multiplier && $suffix =~ /^[ao]/; # BBv2 P-16.7.1 (b) } return $self->append( $suffix ); } # FIXME: Implement according to BBv2 P-16.5.4: {[({[( )]})]} sub bracket() { my( $self ) = @_; my $name_wo_fusion = clone $self; # BBv2 P-16.5.4.1.2: fusion indicators are ignored @$name_wo_fusion = grep { !blessed $_ || !$_->isa( ChemOnomatopist::Name::Part::Fusion:: ) } @$name_wo_fusion; if( $name_wo_fusion =~ /\{/ ) { unshift @$self, '('; push @$self, ')'; } elsif( $name_wo_fusion =~ /\[/ ) { unshift @$self, '{'; push @$self, '}'; } elsif( $name_wo_fusion =~ /\(/ ) { unshift @$self, '['; push @$self, ']'; } else { unshift @$self, '('; push @$self, ')'; } return $self; } sub bracket_numeric_locants() { my( $self ) = @_; if( $self->starts_with_locant && $self->[0]->is_numeric ) { $self->[0]->{value} = '[' . $self->[0]->{value}; $self->[0]->{value} =~ s/-$/]/; } for my $i (1..$#$self) { next unless blessed $self->[$i]; next unless $self->[$i]->isa( ChemOnomatopist::Name::Part::Locants:: ); $self->[$i]->{value} =~ s/^-//; $self->[$i]->{value} =~ s/-$//; $self->[$i]->{value} = '[' . $self->[$i]->{value} . ']'; } } sub concatenate($$@) { my( $A, $B, $reverse ) = @_; ( $A, $B ) = ( $B, $A ) if $reverse; $A = blessed $A ? clone( $A ) : ChemOnomatopist::Name->new( $A ); return $A->append( $B ); } sub pop_e() { my( $self ) = @_; return $self unless @$self; if( blessed $self->[-1] ) { $self->[-1]{value} =~ s/e$//; pop @$self if $self->[-1]{value} eq ''; } else { $self->[-1] =~ s/e$//; pop @$self if $self->[-1] eq ''; } return $self; } sub pop_yl() { my( $self ) = @_; return $self unless @$self; if( blessed $self->[-1] ) { $self->[-1]{value} =~ s/yl$//; pop @$self if $self->[-1]{value} eq ''; } else { $self->[-1] =~ s/yl$//; pop @$self if $self->[-1] eq ''; } return $self; } sub remove_isotopes() { my( $self ) = @_; my @parts = grep { !blessed $_ || !$_->isa( ChemOnomatopist::Name::Part::Isotope:: ) } @$self; return bless { name => \@parts }; } sub has_isotope() { my( $self ) = @_; return any { blessed $_ && $_->isa( ChemOnomatopist::Name::Part::Isotope:: ) } @$self; } sub has_locant() { my( $self ) = @_; return any { blessed $_ && $_->isa( ChemOnomatopist::Name::Part::Locants:: ) } @$self; } sub has_multiplier() { my( $self ) = @_; return any { blessed $_ && $_->isa( ChemOnomatopist::Name::Part::Multiplier:: ) } @$self; } sub has_substituent_locant() { my( $self ) = @_; return any { blessed $_ && $_->isa( ChemOnomatopist::Name::Part::Locants::Substituent:: ) } @$self; } sub is_enclosed() { my( $self ) = @_; return '' unless @$self; return $self->[0] =~ /^[\(\[\{]/ && $self->[-1] =~ /[\)\]\}]$/; } sub is_simple() { my( $self ) = @_; my $nelements = scalar grep { blessed $_ && $_->isa( ChemOnomatopist::Name::Part::Element:: ) } @$self; my $nstems = scalar grep { blessed $_ && $_->isa( ChemOnomatopist::Name::Part::Stem:: ) } @$self; return '' if $nstems >= 2; return '' if $nstems && $nelements; return 1; } # FIXME: Incomplete, untested and unused # 0 if simple, 1 if compound sub level() { my( $self ) = @_; return 0 + ((grep { blessed $_ && $_->isa( ChemOnomatopist::Name::Part::Stem:: ) } @$self) > 1); } sub ends_with_alkane_an_suffix() { my( $self ) = @_; return @$self && blessed $self->[-1] && $self->[-1]->isa( ChemOnomatopist::Name::Part::AlkaneANSuffix:: ); } sub ends_with_element() { my( $self ) = @_; return @$self && blessed $self->[-1] && $self->[-1]->isa( ChemOnomatopist::Name::Part::Element:: ); } sub starts_with_element() { my( $self ) = @_; return @$self && blessed $self->[0] && $self->[0]->isa( ChemOnomatopist::Name::Part::Element:: ); } sub starts_with_locant() { my( $self ) = @_; return @$self && blessed $self->[0] && $self->[0]->isa( ChemOnomatopist::Name::Part::Locants:: ); } sub starts_with_multiplier() { my( $self ) = @_; return @$self && blessed $self->[0] && $self->[0]->isa( ChemOnomatopist::Name::Part::Multiplier:: ); } sub ends_with_locant() { my( $self ) = @_; return @$self && blessed $self->[-1] && $self->[-1]->isa( ChemOnomatopist::Name::Part::Locants:: ); } sub ends_with_multiplier() { my( $self ) = @_; return @$self && blessed $self->[-1] && $self->[-1]->isa( ChemOnomatopist::Name::Part::Multiplier:: ); } sub ends_with_stem() { my( $self ) = @_; return @$self && blessed $self->[-1] && $self->[-1]->isa( ChemOnomatopist::Name::Part::Stem:: ); } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Name/000077500000000000000000000000001463750375500214575ustar00rootroot00000000000000ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Name/Part.pm000066400000000000000000000007171463750375500227300ustar00rootroot00000000000000package ChemOnomatopist::Name::Part; use strict; use warnings; # ABSTRACT: Semantic part of a chemical name # VERSION use ChemOnomatopist::Name; use overload '""' => sub { return $_[0]->{value} }; use overload 'cmp' => sub { return ("$_[0]" cmp "$_[1]") * ($_[2] ? -1 : 1) }; sub new { my( $class, $value ) = @_; return bless { value => $value }, $class; } sub to_name() { my( $self ) = @_; return ChemOnomatopist::Name->new( $self ); } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Name/Part/000077500000000000000000000000001463750375500223655ustar00rootroot00000000000000ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Name/Part/AlkaneANSuffix.pm000066400000000000000000000002471463750375500255250ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::AlkaneANSuffix; use strict; use warnings; # ABSTRACT: Alkane AN suffix # VERSION use parent ChemOnomatopist::Name::Part::; 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Name/Part/Element.pm000066400000000000000000000002631463750375500243150ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::Element; use strict; use warnings; # ABSTRACT: Element name inside a chemical name # VERSION use parent ChemOnomatopist::Name::Part::; 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Name/Part/Fusion.pm000066400000000000000000000002571463750375500241720ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::Fusion; use strict; use warnings; # ABSTRACT: Fusion indicator for fused rings # VERSION use parent ChemOnomatopist::Name::Part::; 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Name/Part/Isotope.pm000066400000000000000000000002471463750375500243500ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::Isotope; # ABSTRACT: Isotope part of a name. # VERSION use strict; use warnings; use parent ChemOnomatopist::Name::Part::; 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Name/Part/Locants.pm000066400000000000000000000003611463750375500243260ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::Locants; use strict; use warnings; # ABSTRACT: Locants of a chemical name # VERSION use parent ChemOnomatopist::Name::Part::; sub is_numeric() { return $_[0]->{value} =~ /^\]?\d+(,\d+)*[-\]]?$/ } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Name/Part/Locants/000077500000000000000000000000001463750375500237705ustar00rootroot00000000000000ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Name/Part/Locants/Substituent.pm000066400000000000000000000003001463750375500266500ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::Locants::Substituent; use strict; use warnings; # ABSTRACT: Locants of a chemical name # VERSION use parent ChemOnomatopist::Name::Part::Locants::; 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Name/Part/Multiplier.pm000066400000000000000000000002601463750375500250470ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::Multiplier; use strict; use warnings; # ABSTRACT: Multiplier of a chemical name # VERSION use parent ChemOnomatopist::Name::Part::; 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Name/Part/NondetachablePrefix.pm000066400000000000000000000003031463750375500266240ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::NondetachablePrefix; # ABSTRACT: Nondetachable prefix of a chemical name # VERSION use strict; use warnings; use parent ChemOnomatopist::Name::Part::; 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Name/Part/Stem.pm000066400000000000000000000002441463750375500236330ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::Stem; use strict; use warnings; # ABSTRACT: Stem of a chemical name # VERSION use parent ChemOnomatopist::Name::Part::; 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Old.pm000066400000000000000000000710651463750375500216640ustar00rootroot00000000000000package ChemOnomatopist::Old; # ABSTRACT: Give molecule a name # VERSION use strict; use warnings; use parent ChemOnomatopist::; use ChemOnomatopist::Chain; use ChemOnomatopist::MolecularGraph; use ChemOnomatopist::Util qw( cmp_arrays ); use ChemOnomatopist::Util::Graph qw( BFS_calculate_chain_length BFS_is_chain_branched ); use Clone qw( clone ); use Graph::Traversal::BFS; use Graph::Undirected; use List::Util qw( all any max min sum0 uniq ); use Scalar::Util qw( blessed ); # There must be a nicer way to handle calls to parent... sub AUTOLOAD { our $AUTOLOAD; my $call = $AUTOLOAD; $call =~ s/.*:://; return if $call eq 'DESTROY'; return ChemOnomatopist->can( $call )->( @_ ); } no warnings 'recursion'; sub get_name { my( $what ) = @_; # Detect the type of the input data my( $graph ); if( blessed $what && $what->isa( Graph::Undirected:: ) ) { $graph = $what; } else { # Assume SMILES string require Chemistry::OpenSMILES::Parser; my $parser = Chemistry::OpenSMILES::Parser->new; ( $graph ) = map { ChemOnomatopist::MolecularGraph->new( $_ ) } $parser->parse( $what ); # Taking only the first graph } die "nothing supplied for get_name()\n" unless $graph; # Check if graph is a tree as trees are easy to process if( $graph->edges != $graph->vertices - 1 ) { # If it is not a tree, than the graph has cycles, and we have to # do our best to recognise them. To make it easier, hydrogen atoms # are removed here for now. $graph->delete_vertices( grep { $_->{symbol} eq 'H' } $graph->vertices ); if( $graph->edges != $graph->vertices ) { die "cannot handle cycles with attachments for now\n"; } if( any { uc $_->{symbol} ne 'C' } $graph->vertices ) { die "cannot handle heterocycles for now\n"; } if( all { $_->{symbol} eq 'C' } $graph->vertices ) { # Cycloalkane detected return 'cyclo' . alkane_chain_name( scalar $graph->vertices ) . 'ane'; } if( ( all { $_->{symbol} eq 'c' } $graph->vertices ) && ( scalar $graph->vertices ) =~ /^(4|6|8|10|12|14|16)$/ ) { # Annulene detected return 'cyclo' . IUPAC_numerical_multiplier( scalar $graph->vertices, 1 ) . IUPAC_numerical_multiplier( scalar $graph->vertices / 2, 1 ) . 'ene'; } # No other types of graphs with cycles can be processed for now die "only limited set of homocycles is supported for now\n"; } # Hydrogen atoms are no longer important $graph->delete_vertices( grep { is_element( $_, 'H' ) } $graph->vertices ); # Check for unsupported elements. if( any { !is_element( $_, 'C' ) } $graph->vertices ) { die "cannot handle atoms other than C and H now\n"; } my( $order ) = select_mainchain( $graph->copy ); my @chain; for my $curr_vertex (@$order) { my( $vertex ) = grep { $_->{number} == $curr_vertex } $graph->vertices; push @chain, $vertex; } return get_mainchain_name( $graph->copy, ChemOnomatopist::Chain->new( $graph, undef, @chain ) ); } # BFS is performed for the given graph after all vertices that are not carbons # removed sub BFS_order_carbons_only { my( $graph, $start ) = @_; $graph = $graph->copy; $graph->delete_vertices( grep { !is_element( $_, 'C' ) } $graph->vertices ); my $bfs; if( $start ) { $bfs = Graph::Traversal::BFS->new( $graph, start => $start ); } else { $bfs = Graph::Traversal::BFS->new( $graph ); } return $bfs->bfs; } # Subroutine gets an graph, removes all vertices that do not have C as their element. # Performs BFS on that chain. During BFS, distance from start is calculated to each vertice sub BFS_order_carbons_only_return_lengths { my ( $graph, $start ) = @_; my $carbon_graph = $graph->copy; $carbon_graph->delete_vertices( grep { !is_element( $_, 'C') } $carbon_graph->vertices ); my $is_any_visited; my %lengths; my $bfs = Graph::Traversal::BFS->new( $carbon_graph, pre => sub { if( !$is_any_visited ) { $lengths{$_[0]->{number}} = 0; $is_any_visited = 1; } }, tree_edge => sub { if( !defined $lengths{$_[0]->{number}} ) { ( $_[0], $_[1] ) = ( $_[1], $_[0] ); } $lengths{$_[1]->{number}} = $lengths{$_[0]->{number}} + 1; }, start => $start ); return \%lengths, $bfs->bfs; } # Returns main (parental) chain to be used during the naming sub select_mainchain { my( $graph ) = @_; my @order = BFS_order_carbons_only( $graph ); my $start = $order[-1]; my( $lengths, @second_order ) = BFS_order_carbons_only_return_lengths( $graph, $start ); my $end = $second_order[-1]; # Finding all farthest vertices from the starting point my @farthest = grep { $lengths->{$_} eq $lengths->{$end->{number}} } keys %$lengths; # Also adding the first vertice to the array since it is farthest from other # ones push @farthest, $start->{number}; # Make a carbon-only graph my $carbon_graph = $graph->copy; $carbon_graph->delete_vertices( grep { !is_element( $_, 'C' ) } $carbon_graph->vertices ); # Going through every vertice in "farthest" array and creating tree-like structures my @all_trees; for (my $i = 0; $i < scalar @farthest; $i++) { my %tree = ( $farthest[$i] => [ $farthest[$i], 0 ] ); my( $vertex ) = grep { $_->{number} eq $farthest[$i] } $carbon_graph->vertices; # Start creation of the tree from all the starting vertices push @all_trees, \%{create_tree( $carbon_graph->copy, $vertex, \%tree )}; } my $trees; # Extracts arrays of all longest chains with numbers of vertices (in order) # from each tree-like structure my @main_chains = prepare_paths( @all_trees ); # From all possible main chains in tree-like structures, subroutine returns # the ones that has the greatest number of side chains. Also returns only the # trees that still have some possible main chains after selection ( $trees, @main_chains ) = rule_greatest_number_of_side_chains( $carbon_graph->copy, \@main_chains, @all_trees, ); return @{$main_chains[0]} if scalar @{$main_chains[0]} == 1; # If more than one chain is left, second rule is applied. # From all main chains left, subroutine selects all that have the # lowest numbered locants. Also the trees that have possible main # chains returned ( $trees, @main_chains ) = rule_lowest_numbered_locants( $carbon_graph->copy, @main_chains, @$trees, ); return @{$main_chains[0]} if scalar @{$main_chains[0]} == 1; # If more than one chain is left, third rule is applied. # From all main chains left, subroutine selects all that have the # most carbons in side chains. Also the trees that have possible main # chains returned ( $trees, @main_chains ) = rule_most_carbon_in_side_chains( $carbon_graph->copy, @main_chains, @$trees, ); return @{$main_chains[0]} if scalar @{$main_chains[0]} == 1; # If more than one chain is left, fourth rule is applied. # From all main chains left, subroutine selects all that have # the least branched side chains. Also the trees that have # possible main chains returned ( $trees, @main_chains ) = rule_least_branched_side_chains( $carbon_graph->copy, @main_chains, @$trees, ); return @{$main_chains[0]} if scalar @{$main_chains[0]} == 1; # If more than one chain is left, program states that there are # few chains that are identical by all the rules and selects # one from all that are left. # One main chain is picked from all that are left my $main_chain = rule_pick_chain_from_valid( $carbon_graph->copy, @main_chains, @$trees, ); return $main_chain; } # Creating tree like structure for all the longest paths in molecule sub create_tree { my( $graph, $atom, $tree ) = @_; my @neighbours = $graph->neighbours( $atom ); my @array = @{$tree->{$atom->{number}}}; my @new_array = ($atom->{number}); # Since first number in the stack-of-array boxes represents the parent of # the vertice, array with box information is shifted shift @array; # Box for the vertice is created by increasing box information in parental # box by one push @new_array, map { $_ + 1 } @array; # If there is one neighbour, it means that vertice do not have any branching. # Analysis of next vertice (the neighbour) is started if( scalar @neighbours == 1 ) { unless (exists $tree->{$neighbours[0]->{number}}) { $tree->{$neighbours[0]->{number}} = [ @new_array ]; $graph->delete_vertex( $atom ); create_tree( $graph, $neighbours[0], $tree ); } } # If there is more than one neighour for the current vertice, analysis of # each of them is started independently elsif( scalar @neighbours > 1 ) { push @new_array, 0; $graph->delete_vertex( $atom ); foreach my $neighbour ( @neighbours ) { $tree->{$neighbour->{number}} = [ @new_array ]; create_tree( $graph, $neighbour, $tree ); } } # If there is no neighbour or other vertices, all graph has been analyzed. # Created structure is returned return $tree; } # Create arrays of vertice numbers of all possible longest chains in the tree sub prepare_paths { my( @trees ) = @_; my $trees_copy = clone \@trees; my @all_chains; foreach my $tree ( @$trees_copy ) { my %structure = %$tree; # Tree-like structure is sorted by parental vertex number my @sorted = sort { $structure{$a}->[1] <=> $structure{$b}->[1] } keys %structure; my $last = $sorted[-1]; my @chain_ending = grep { $structure{$_}->[1] == $structure{$last}->[1] } keys %structure; foreach my $ending ( @chain_ending ) { push @all_chains, save_main_chain_vertices_in_array( $ending, [$ending], $tree ); } } # Adds reverted chains if they are not present yet as the longest chains for my $chain (@all_chains) { next if array_exists( [reverse @$chain], @all_chains ); push @all_chains, [reverse @$chain]; } return @all_chains; } # Checks if array exists in array of arrays sub array_exists { my( $chain, @all_chains ) = @_; my $same; for my $curr_chain ( @all_chains ) { $same = 1; for my $index ( 0..scalar @{$curr_chain}-1 ) { if( $chain->[$index] != $curr_chain->[$index] ) { $same = 0; last; } } return 1 if $same; } return 0; } # Tries to find the chain which has the greatest number of side chains sub rule_greatest_number_of_side_chains { my( $graph, $chains, @trees ) = @_; my $trees_copy = clone \@trees; my $index = 0; my @number_of_side_chains; foreach my $tree ( @$trees_copy ) { my %structure = %{clone $tree}; # Reference to parental chain is removed from the boxes foreach my $key ( keys %structure ) { shift @{$structure{$key}}; } # Beginning of the structure is found. Then all chains that belongs to # the current tree are selected my @first = grep { $structure{$_}->[0] == 0 } keys %structure; my @chains_in_the_tree = grep { $_->[0] == $first[0] || $_->[-1] == $first[0] } @$chains; # Structure with index of the tree, beginning and ending of the chain, # number of side chains in the chain created for each chain for my $chain ( @chains_in_the_tree ) { push @number_of_side_chains, [$index, $chain->[0], $chain->[-1], find_number_of_side_chains( $graph, \@{$chain}, $tree ) ]; } $index++; } # All chains that have the biggest number of side chains are selected and returned my @sorted_numbers = sort { $a->[3] <=> $b->[3] } @number_of_side_chains; my $path_length = $sorted_numbers[-1][3]; my @biggest_number_of_side_chains = grep {$_->[3] == $path_length} @number_of_side_chains; my %seen; my @uniq_biggest_number_of_side_chains = grep { !$seen{$_->[0]}++ } @biggest_number_of_side_chains; my @result = @trees[map {$_->[0]} @uniq_biggest_number_of_side_chains]; my @eligible_chains; for my $chain (@$chains) { if( any {$_->[1] == $chain->[0] && $_->[2] == $chain->[-1]} @biggest_number_of_side_chains ) { push @eligible_chains, $chain; } } return \@result, \@eligible_chains; } # Tries to find the chain which has the lowest-numbered locants sub rule_lowest_numbered_locants { my ( $graph, $chains, @trees ) = @_; my $trees_copy = clone \@trees; my $index = 0; my @locant_placing; foreach my $tree ( @$trees_copy ) { my %structure = %{clone $tree}; # Reference to parental chain is removed from the boxes foreach my $key ( keys %structure ) { shift @{$structure{$key}}; } # Beginning of the structure is found. Then all chains that belongs to # the current tree are selected my @first = grep { $structure{$_}->[0] == 0 } keys %structure; my @chains_in_the_tree = grep { @{$_}[0] == $first[0] || @{$_}[-1] == $first[0] } @$chains; # Structure with index of the tree, beginning and ending of the chain, # places of the locants in the chain created for each tree for my $chain ( @chains_in_the_tree ) { push @locant_placing, [$index, $chain->[0], $chain->[-1], [find_locant_placing( $graph, $chain, )] ]; } $index++; } # All chains that have the lowest numbers of locants are selected and returned my @sorted_paths = sort compare_locant_placings reverse @locant_placing; my $lowest_locants = $sorted_paths[0][3]; my @lowest_locants_paths = grep { join( '', @{$_->[3]} ) eq join( '', @$lowest_locants ) } @locant_placing; my %seen; my @uniq_lowest_locants_paths = grep { !$seen{$_->[0]}++ } @lowest_locants_paths; my @result = @trees[map {$_->[0]} @uniq_lowest_locants_paths]; my @eligible_chains; for my $chain (@$chains) { if( any { $_->[1] == $chain->[0] and $_->[2] == $chain->[-1] } @lowest_locants_paths) { push @eligible_chains, $chain; } } return \@result, \@eligible_chains; } # Tries to find chain that has the greatest number of carbon atoms in the smaller # side chains sub rule_most_carbon_in_side_chains { my ( $graph, $chains, @trees ) = @_; my $trees_copy = clone \@trees; my @side_chain_lengths; my $index = 0; foreach my $tree (@$trees_copy) { my %structure = %{clone $tree}; my @all_vertices = keys %structure; # Reference to parental chain is removed from the boxes foreach my $key (keys %structure) { shift @{$structure{$key}}; } my @sorted = sort { @{$structure{$a}} <=> @{$structure{$b}} or $structure{$a}->[0] cmp $structure{$b}->[0] } keys %structure; my $last = $sorted[-1]; # Beginning of the structure is found. Then all chains that belongs to # the current tree are selected my( $first ) = grep { $structure{$_}->[0] == 0 } keys %structure; my @structure_chains = grep { $_->[0] == $first || $_->[-1] == $first } @$chains; # Structure with index of the tree, beginning and ending of the chain, # lengths of side chains of the chain created for each tree for my $chain (@structure_chains) { push @side_chain_lengths, [$index, $chain->[0], $chain->[-1], [find_lengths_of_side_chains( $graph->copy, $chain->[-1], \@{$chain}, [], $tree, scalar @{$chain} )] ]; } $index++; } # All chains that have the highest number of carbons in side chains are selected and returned my @sorted_final = sort compare_locant_placings @side_chain_lengths; my $last = $sorted_final[-1][3]; my @greatest_no_of_side_chains_paths = grep { join( '', @{$_->[3]} ) eq join( '', @$last ) } @sorted_final; my @eligible_chains; for my $chain (@{$chains}){ if( any { $_->[1] == $chain->[0] && $_->[2] == $chain->[-1] } @greatest_no_of_side_chains_paths ) { push @eligible_chains, $chain; } } my %seen; my @uniq_side_chain_paths = grep { !$seen{$_->[0]}++ } @greatest_no_of_side_chains_paths; my @result = @trees[map { $_->[0] } @uniq_side_chain_paths]; return \@result, \@eligible_chains; } # Tries to find chain that have the least branched side chains sub rule_least_branched_side_chains { my ( $graph, $chains, @trees ) = @_; my $trees_copy = clone \@trees; my $index = 0; my @number_of_branched_side_chains; foreach my $tree (@$trees_copy) { my %structure = %{clone $tree}; # Reference to parental chain is removed from the boxes foreach my $key (keys %structure) { shift @{$structure{$key}}; } # Beginning of the structure is found. Then all chains that belong to # the current tree are selected my( $first ) = grep { $structure{$_}->[0] == 0 } keys %structure; my @chains_in_the_tree = grep { $_->[0] == $first || $_->[-1] == $first } @$chains; for my $chain (@chains_in_the_tree) { push @number_of_branched_side_chains, [$index, $chain->[0], $chain->[-1], find_number_of_branched_side_chains( $graph, \@{$chain}, $tree ) ]; } $index++; } # All chains that have the least amount of branches side chains are selected and returned my @sorted_paths = sort { $a->[3] <=> $b->[3] } @number_of_branched_side_chains; my $path_length = $sorted_paths[0][3]; my @longest_paths = grep { $_->[3] == $path_length } @number_of_branched_side_chains; my %seen; my @uniq_longest_paths = grep { !$seen{$_->[0]}++ } @longest_paths; my @result = @trees[map { $_->[0] } @uniq_longest_paths]; my @eligible_chains; for my $chain (@$chains) { if( any { $_->[1] == $chain->[0] && $_->[2] == $chain->[-1] } @longest_paths ){ push @eligible_chains, $chain; } } return \@result, \@eligible_chains; } # Subroutine sorts all valid chains that are left and returns the first one - # the one that have carbons with lowest indexes if there is no differences # regarding the attachment names. If there is, then selects the ones that have # lowest attachment indexes with lowest attachments alphabetically sub rule_pick_chain_from_valid { my( $graph, $chains, @trees ) = @_; my( $chain ) = sort { cmp_arrays( $a, $b ) } pick_chain_with_lowest_attachments_alphabetically( $graph, $chains, @trees ); return $chain; } # Subroutine selects chain that has the lowest attachments by alpabetical naming sub pick_chain_with_lowest_attachments_alphabetically { my( $graph, $chains, @trees ) = @_; # Locant placements are found for all trees my @attachments; for my $i (0..$#trees) { my %structure = %{clone $trees[$i]}; # Reference to parental chain is removed from the boxes for my $key (keys %structure) { shift @{$structure{$key}}; } # Beginning of the structure is found. Then all chains that belongs to # the current tree are selected my( $first ) = grep { $structure{$_}->[0] == 0 } keys %structure; my @chains_in_the_tree = grep { $_->[0] == $first || $_->[-1] == $first } @$chains; # Placings of the locants found for each chain for my $chain (@chains_in_the_tree) { my @attachments_only; for my $locant (uniq find_locant_placing( $graph, $chain )) { my( $vertex ) = grep { $_->{number} == $chain->[$locant-1] } $graph->vertices; # Cycle through non-mainchain neighbours: for my $neighbour ($graph->neighbours( $vertex )) { next if any { $neighbour->{number} eq $_ } @$chain; # Find the name for a sidechain my $graph_copy = $graph->copy; $graph_copy->delete_edge( $vertex, $neighbour ); my $attachment_name = get_sidechain_name( $graph_copy, $vertex, $neighbour ); $attachment_name = "($attachment_name)" if $attachment_name =~ /^[0-9]/; push @attachments_only, $attachment_name; } } push @attachments, [clone( $chain ), \@attachments_only]; } } # All chains that have the same - alpabetically lowest attachments selected my( $best ) = sort { cmp_attachments( $a->[1], $b->[1] ) } @attachments; my @correct_chains_all = grep { join( ',', @{$_->[1]} ) eq join( ',', @{$best->[1]} ) } @attachments; return map { $_->[0] } @correct_chains_all; } # Returns array that contains numbers of vertices that are in main chain sub save_main_chain_vertices_in_array { my( $curr_vertex, $all_vertices, $structure ) = @_; if( $structure->{$curr_vertex}[0] == $curr_vertex ) { return $all_vertices; } else { push @$all_vertices, $structure->{$curr_vertex}[0]; return save_main_chain_vertices_in_array( $structure->{$curr_vertex}->[0], $all_vertices, $structure ); } } # Returns array that contains lengths of all side chains sub find_lengths_of_side_chains { my( $graph, $curr_vertex, $main_chain_vertices, $side_chain_lengths, $structure, $atoms_left ) = @_; return sort @$side_chain_lengths unless $atoms_left; my @vertices = $graph->vertices; my( $vertex ) = grep { $_->{number} == $curr_vertex } @vertices; my @curr_neighbours = $graph->neighbours( $vertex ); if( scalar @curr_neighbours == 1 ) { $graph->delete_vertex( $vertex ); $atoms_left--; find_lengths_of_side_chains( $graph, $curr_neighbours[0]->{number}, $main_chain_vertices, $side_chain_lengths, $structure, $atoms_left ); } else { my @side_chain_neighbours; my $next_chain_vertex; # Find all neighours of the chain that does not exist in main chain and # the next chain to be analyzed foreach my $neigh ( @curr_neighbours ) { if( any { $neigh->{number} eq $_ } @$main_chain_vertices ) { $next_chain_vertex = $neigh; } else { push @side_chain_neighbours, $neigh; } } $graph->delete_vertex( $vertex ); $atoms_left--; # For each side chain neighbour, find their chain lengths foreach my $neighbour ( @side_chain_neighbours ) { push @$side_chain_lengths, BFS_calculate_chain_length( $graph, $neighbour ); } find_lengths_of_side_chains( $graph, $next_chain_vertex->{number}, $main_chain_vertices, $side_chain_lengths, $structure, $atoms_left ); } } # TODO: Try to merge all subroutines # Find placings of all locants in the chain. # Returns an array of indices denoting locant placements. # This code treats vertices with degrees larger than 2 as having sidechain attachments. sub find_locant_placing { my( $graph, $main_chain ) = @_; # Indices received instead of vertices, transform them. # This later on should be removed. if( @$main_chain && !ref $main_chain->[0] ) { my %vertices_by_id = map { ( $_->{number} => $_ ) } $graph->vertices; $main_chain = [ map { $vertices_by_id{$_} } @$main_chain ]; } # Visit all attachments and memorize their attachment positions my @locants; for my $i (0..$#$main_chain) { my $vertex = $main_chain->[$i]; next unless $graph->degree( $vertex ) > 2; push @locants, ( $i ) x ( $graph->degree( $vertex ) - 2 ); } return @locants; } # Returns number of side chains sub find_number_of_side_chains { my( $graph, $main_chain, $structure ) = @_; # Code is destructive, need to make a copy before execution: $graph = $graph->copy; my @vertices = $graph->vertices; my $number_of_side_chains = 0; for my $curr_vertex ( reverse @$main_chain ) { my( $vertex ) = grep { $_->{number} == $curr_vertex } @vertices; my @curr_neighbours = $graph->neighbours( $vertex ); return $number_of_side_chains unless scalar @curr_neighbours; $graph->delete_vertex( $vertex ); if( scalar @curr_neighbours > 1 ) { foreach my $neigh (@curr_neighbours) { next if any { $neigh->{number} eq $_ } @$main_chain; $number_of_side_chains++; } } } } sub find_number_of_branched_side_chains { my( $graph, $main_chain, $structure ) = @_; # Code is destructive, need to make a copy before execution: $graph = $graph->copy; my @vertices = $graph->vertices; my $number_of_branched_side_chains = 0; for my $curr_vertex ( reverse @$main_chain ) { my( $vertex ) = grep {$_->{number} == $curr_vertex} @vertices; my @curr_neighbours = $graph->neighbours( $vertex ); return $number_of_branched_side_chains unless scalar @curr_neighbours; $graph->delete_vertex( $vertex ); if( scalar @curr_neighbours > 1 ) { foreach my $neigh (@curr_neighbours) { next if any { $neigh->{number} eq $_ } @$main_chain; $number_of_branched_side_chains += BFS_is_chain_branched( $graph, $neigh ); } } } } # Sorts given names only based on alphabetical part of the name sub cmp_attachments { my( $a, $b ) = @_; my @a = @{$a}; my @b = @{$b}; for (0..$#a) { my $a_alpha = $a[$_]; my $b_alpha = $b[$_]; my @A = ref $a_alpha eq 'ARRAY' ? sort @$a_alpha : ( $a_alpha ); my @B = ref $b_alpha eq 'ARRAY' ? sort @$b_alpha : ( $b_alpha ); for (0..min( scalar( @A ), scalar( @B ) )-1) { my $a_alpha = $A[$_]; my $b_alpha = $B[$_]; $a_alpha =~ s/[^a-zA-Z]+//g; $b_alpha =~ s/[^a-zA-Z]+//g; my $a_has_tert = $a_alpha =~ s/^tert(butyl)$/$1/; my $b_has_tert = $b_alpha =~ s/^tert(butyl)$/$1/; return $a_has_tert <=> $b_has_tert if $a_has_tert <=> $b_has_tert; return $a_alpha cmp $b_alpha if $a_alpha cmp $b_alpha; } } return 0; } # Sorts locant placings from lowest to largest # This had code identical to compare_side_chain_lengths(), thus calls to the latter have been redirected here. sub compare_locant_placings { my @A = @$a; my @B = @$b; if( @A >= 4 && ref $A[3] ) { # This is the "old" data structure @A = @{$A[3]}; @B = @{$B[3]}; } for (0..$#A) { return $A[$_] <=> $B[$_] if $A[$_] <=> $B[$_]; } return 0; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Util.pm000066400000000000000000000042741463750375500220610ustar00rootroot00000000000000package ChemOnomatopist::Util; # ABSTRACT: Generic utilities # VERSION use strict; use warnings; use Exporter; use Graph::Undirected; use List::Util qw( max min pairs ); use Scalar::Util qw( blessed ); use parent Exporter::; our @EXPORT_OK = qw( all_max all_min array_frequencies circle_permutations cmp_arrays copy zip ); sub all_max(&@) { my $code = shift; return () unless @_; my @values = map { $code->( $_ ) } @_; my $max = max @values; return map { $_[$_] } grep { $values[$_] == $max } 0..$#_; } sub all_min(&@) { my $code = shift; return () unless @_; my @values = map { $code->( $_ ) } @_; my $min = min @values; return map { $_[$_] } grep { $values[$_] == $min } 0..$#_; } sub array_frequencies(@) { my %frequencies; for (@_) { $frequencies{$_} = 0 unless exists $frequencies{$_}; $frequencies{$_}++; } return %frequencies; } sub circle_permutations(@) { my @permutations; for (0..$#_) { push @permutations, [ @_ ]; push @_, shift @_; } @_ = reverse @_; for (0..$#_) { push @permutations, [ @_ ]; push @_, shift @_; } return @permutations; } # Takes two arrays and compares them. # Comparison is first performed numerically on the corresponding array elements. # Then, if all corresponding elements are equal, arrays are compared by length. sub cmp_arrays($$) { my( $a, $b ) = @_; for (0..min( scalar( @$a ), scalar( @$b ) )-1) { return $a->[$_] <=> $b->[$_] if $a->[$_] <=> $b->[$_]; } return @$a <=> @$b; } sub copy($) { my( $object ) = @_; die "can only copy Graph now\n" unless blessed $object && $object->isa( Graph::Undirected:: ); # Graphs have to be copied with the following code as Graph::copy() loses attributes. my $graph = $object; my $copy = $graph->copy; for my $edge ($graph->edges) { next unless $graph->has_edge_attributes( @$edge ); $copy->set_edge_attributes( @$edge, $graph->get_edge_attributes( @$edge ) ); } return $copy; } sub zip(@) { die "odd input to zip\n" if @_ % 2; my $N = @_ / 2; return map { $_[$_], $_[$_ + $N] } 0..$N-1; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Util/000077500000000000000000000000001463750375500215145ustar00rootroot00000000000000ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Util/Graph.pm000066400000000000000000000241051463750375500231150ustar00rootroot00000000000000package ChemOnomatopist::Util::Graph; # ABSTRACT: Generic graph utilities # VERSION use strict; use warnings; use ChemOnomatopist::Util qw( copy ); use Exporter; use Graph::Traversal::BFS; use List::Util qw( any sum0 ); use Set::Object qw( set ); use parent Exporter::; our @EXPORT_OK = qw( BFS_calculate_chain_length BFS_is_chain_branched cyclic_components graph_center graph_cycle_core graph_cycles graph_has_cycle graph_longest_paths graph_longest_paths_from_vertex graph_path_between_vertices graph_without_edge_attributes merge_graphs subgraph tree_branch_positions tree_number_of_branches ); # Calculates length of given graph (vertices count) sub BFS_calculate_chain_length { my( $graph, $start ) = @_; my $bfs = Graph::Traversal::BFS->new( $graph, start => $start ); return scalar $bfs->bfs; } # Returns 1 if there is any braches in the given graph and if there is none sub BFS_is_chain_branched { my( $graph, $start ) = @_; # FIXME: Not entirely sure why visited vertices are removed (A.M.) my $graph_copy = $graph->copy; my $branched = 0; my $bfs = Graph::Traversal::BFS->new( $graph, pre => sub { my @neighbours = $graph_copy->neighbours( $_[0] ); $branched = 1 if scalar @neighbours > 1; $graph_copy->delete_vertex( $_[0] ); }, start => $start ); $bfs->bfs; return $branched; } sub cyclic_components { my( $graph ) = @_; $graph = copy $graph; # Due to the issue in Graph, bridges() returns strings instead of real objects. # Graph issue: https://github.com/graphviz-perl/Graph/issues/29 # The code below works now buggy (< 0.9727) as well as fixed (>= 0.9727) versions. my %vertices_by_name = map { $_ => $_ } $graph->vertices; $graph->delete_edges( map { map { $vertices_by_name{$_} } @$_ } $graph->bridges ); $graph->delete_vertices( grep { !$graph->degree( $_ ) } $graph->vertices ); return () unless $graph->vertices; # No vertices = no cycles return map { subgraph( $graph, @$_ ) } $graph->connected_components; } # Find how many side attachments are at every position of the given path. sub graph_attachment_positions { my( $graph, @vertices ) = @_; $graph = $graph->copy; $graph->delete_path( @vertices ); return map { $graph->degree( $_ ) } @vertices; } # Finds center (or two centers) of a tree graph. # Returns one or two vertices constituting the center. sub graph_center { my( $graph ) = @_; $graph = $graph->copy; my $nvertices = scalar $graph->vertices; while( $graph->vertices > 2 ) { $graph->delete_vertices( grep { $graph->degree( $_ ) == 1 } $graph->vertices ); my $nvertices_now = scalar $graph->vertices; if( $nvertices_now == $nvertices ) { # Safeguard for cycles and/or isolated vertices die 'cannot find center of cyclic or isolated graphs'; } $nvertices = $nvertices_now; } return $graph->vertices; } # Iteratively removes leaves of a graph until cycle core remains. sub graph_cycle_core { my( $graph ) = @_; $graph = $graph->copy; while( my @leaves = grep { $graph->degree( $_ ) == 1 } $graph->vertices ) { $graph->delete_vertices( @leaves ); } return $graph; } # Decomposes bridgeless graph into cycles # FIXME: Experimental sub graph_cycles { my( $graph ) = @_; $graph = copy $graph; my @cycles; while( $graph->vertices ) { my $triple_connected = set( grep { $graph->degree( $_ ) == 3 } $graph->vertices ); if( !@$triple_connected ) { push @cycles, $graph; last; } my @chords = grep { $triple_connected->has( $_->[0] ) && $triple_connected->has( $_->[1] ) } $graph->edges; my $wo_chords = copy( $graph )->delete_vertices( @$triple_connected ); for my $component ($wo_chords->connected_components) { next if @$component == 1; my @ends = grep { $wo_chords->degree( $_ ) == 1 } @$component; # Should be two my( $chord ) = grep { ($graph->has_edge( $_->[0], $ends[0] ) && $graph->has_edge( $_->[1], $ends[1] )) || ($graph->has_edge( $_->[0], $ends[1] ) && $graph->has_edge( $_->[1], $ends[0] )) } @chords; next unless $chord; # Found a chord which completes a cycle my $vertices = set( @$component, @$chord ); my $cycle = copy( $graph )->delete_vertices( grep { !$vertices->has( $_ ) } $graph->vertices ); push @cycles, $cycle; $graph->delete_vertices( @$component ); } } return @cycles; } sub graph_has_cycle { my( $graph ) = @_; return '' unless $graph->vertices; return $graph->edges > $graph->vertices - 1; } # Finds longest paths in a tree graph. # The subroutine finds all longest paths originating at graph center(s) and produces all their combinations. # No two paths containing the same vertices are returned. sub graph_longest_paths { my( $graph ) = @_; my @centers = graph_center( $graph ); my @longest_paths; if( @centers == 1 ) { # Single-centered graph # Removing the center from longest path parts, to be added later my @longest_path_parts = map { [ @{$_}[1..$#$_] ] } graph_longest_paths_from_vertex( $graph, $centers[0] ); for my $i (0..$#longest_path_parts) { for my $j (0..$#longest_path_parts) { next if $i >= $j; # Ensure that two paths do not start at the same vertex. next if $longest_path_parts[$i]->[0] eq $longest_path_parts[$j]->[0]; push @longest_paths, [ reverse( @{$longest_path_parts[$i]} ), $centers[0], @{$longest_path_parts[$j]} ]; } } } else { # Double-centered graph $graph = $graph->copy; $graph->delete_edge( @centers ); my @longest_path_parts1 = graph_longest_paths_from_vertex( $graph, $centers[0] ); my @longest_path_parts2 = graph_longest_paths_from_vertex( $graph, $centers[1] ); for my $i (0..$#longest_path_parts1) { for my $j (0..$#longest_path_parts2) { push @longest_paths, [ reverse( @{$longest_path_parts1[$i]} ), @{$longest_path_parts2[$j]} ]; } } } return @longest_paths; } # Finds all the longest paths from given vertex sub graph_longest_paths_from_vertex { my( $graph, $vertex ) = @_; my %from = ( $vertex => undef ); my %length = ( $vertex => 0 ); my $bfs = Graph::Traversal::BFS->new( $graph, tree_edge => sub { my( $u, $v ) = @_; ( $u, $v ) = ( $v, $u ) if exists $from{$v}; $from{$v} = $u; $length{$v} = $length{$u} + 1; }, start => $vertex, ); $bfs->bfs; my @furthest_leaves; my $furthest_distance = 0; for my $vertex ( $graph->vertices ) { next unless exists $length{$vertex}; # May happen in disconnected graphs if( $length{$vertex} < $furthest_distance ) { next; } elsif( $length{$vertex} == $furthest_distance ) { push @furthest_leaves, $vertex; } else { @furthest_leaves = ( $vertex ); $furthest_distance = $length{$vertex}; } } # Backtrack starting from the furthest leaves to collect the longest # paths. In the returned result path, starting vertex is the first. my @longest_paths; for my $vertex ( @furthest_leaves ) { my @path; while( defined $vertex ) { push @path, $vertex; $vertex = $from{$vertex}; } push @longest_paths, [ reverse @path ]; } return @longest_paths; } sub graph_without_edge_attributes($) { my( $graph ) = @_; $graph = $graph->copy; for ($graph->edges) { $graph->delete_edge_attributes( @$_ ); } return $graph; } sub merge_graphs { my( $A, $B, $A_vertex, $B_vertex ) = @_; my $merged = copy $A; for my $edge ($B->edges) { $merged->add_edge( @$edge ); next unless $B->has_edge_attributes( @$edge ); $merged->set_edge_attributes( @$edge, $B->get_edge_attributes( @$edge ) ); } $merged->add_edge( $A_vertex, $B_vertex ) if $A_vertex && $B_vertex; return $merged; } sub subgraph { my( $graph, @vertices ) = @_; my $vertices = set( @vertices ); $graph = copy $graph; $graph->delete_vertices( grep { !$vertices->has( $_ ) } $graph->vertices ); return $graph; } # Given a tree and a path, finds the number of branches branching off the given path. # It is equal to the sum of all degrees minus the edges between vertices in a path. sub tree_number_of_branches { my( $tree, @vertices ) = @_; return sum0( map { $tree->degree( $_ ) } @vertices ) - 2 * (scalar @vertices - 1); } # Returns a list of 0-based indices of branch positions. sub tree_branch_positions { my( $tree, @vertices ) = @_; return map { ( $_ ) x ( $tree->degree( $vertices[$_] ) - 2 ) } grep { $tree->degree( $vertices[$_] ) > 2 } 0..$#vertices; } # Find a path between two vertices in an acyclic graph. sub graph_path_between_vertices { my( $graph, $A, $B ) = @_; if( graph_has_cycle( $graph ) ) { die "cannot call graph_path_between_vertices() on graph with cycles\n"; } $graph = $graph->copy; while( my @leaves = grep { $graph->degree( $_ ) == 1 && $_ != $A && $_ != $B } $graph->vertices ) { $graph->delete_vertices( @leaves ); } # No edges means that path does not exist between these two vertices return () unless $graph->edges; my @path; my $vertex = $A; while( $vertex ) { push @path, $vertex; my( $vertex_now ) = $graph->neighbours( $vertex ); $graph->delete_vertex( $vertex ); $vertex = $vertex_now; } return @path; } 1; ChemOnomatopist-0.10.0/lib/ChemOnomatopist/Util/SMILES.pm000066400000000000000000000026321463750375500230510ustar00rootroot00000000000000package ChemOnomatopist::Util::SMILES; use strict; use warnings; # ABSTRACT: SMILES utilities # VERSION use parent Exporter::; our @EXPORT_OK = qw( cycle_SMILES cycle_SMILES_explicit path_SMILES ); sub cycle_SMILES { my( $graph, @cycle ) = @_; my $SMILES = cycle_SMILES_explicit( $graph, @cycle ); $SMILES =~ s/-//g; return $SMILES; } sub cycle_SMILES_explicit { my( $graph, @cycle ) = @_; my $SMILES = ''; for my $i (0..$#cycle) { my $symbol = $cycle[$i]->{symbol}; $symbol = "[$symbol]" unless $symbol =~ /^[bcnosp]$/i || $symbol =~ /^(F|Cl|Br|I|\*)$/; $SMILES .= $symbol; if( $graph->has_edge_attribute( $cycle[$i], $cycle[($i+1) % scalar @cycle], 'bond' ) ) { $SMILES .= $graph->get_edge_attribute( $cycle[$i], $cycle[($i+1) % scalar @cycle], 'bond' ); } else { $SMILES .= '-'; } } return $SMILES; } sub path_SMILES { my( $graph, @path ) = @_; my $SMILES = ''; for my $i (0..$#path) { my $symbol = $path[$i]->{symbol}; $symbol = "[$symbol]" unless $symbol =~ /^[bcnosp]$/i || $symbol =~ /^(F|Cl|Br|I|\*)$/; $SMILES .= $symbol; next if $i == $#path; next unless $graph->has_edge_attribute( $path[$i], $path[$i+1], 'bond' ); $SMILES .= $graph->get_edge_attribute( $path[$i], $path[$i+1], 'bond' ); } return $SMILES; } 1; ChemOnomatopist-0.10.0/links.md000066400000000000000000000007541463750375500163700ustar00rootroot00000000000000To find the longest chain, longest path has to be established in the tree for tree compounds: * https://www.geeksforgeeks.org/longest-path-undirected-tree/ Story for the name of the project: * https://english.stackexchange.com/questions/430516/is-there-a-word-for-the-giver-of-names BSc thesis of Miglė Urbonaitė on ChemOnomatopist (in Lithuanian): * https://www.lvb.lt/permalink/f/8og6ah/ELABAETD146240727 Blue Book v2 (BBv2): * https://iupac.qmul.ac.uk/BlueBook/PDF/BlueBookV2.pdf ChemOnomatopist-0.10.0/scripts/000077500000000000000000000000001463750375500164075ustar00rootroot00000000000000ChemOnomatopist-0.10.0/scripts/make-test000077500000000000000000000022221463750375500202250ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use File::Basename qw( basename ); use IPC::Run3; use Getopt::Long::Descriptive; my $basename = basename $0; my( $opt, $usage ) = describe_options( <<"END" . 'OPTIONS', USAGE $basename [] [] DESCRIPTION $basename prepares tests for ChemOnomatopist END [ 'author', 'mark output tests with AUTHOR flag' ], [ 'pubchem', 'treat the input as three-columned PubChem file' ], [], [ 'help', 'print usage message and exit', { shortcircuit => 1 } ], ); if( $opt->help ) { print $usage->text; exit; } my @lines = <>; for (@lines) { s/^\s+//; s/\s+$//; my( $smiles, $iupac, $source ); if( $opt->pubchem ) { ( my $id, $iupac, $smiles ) = split /\t/, $_; $source = "PubChem $id"; } else { my( $stderr ); $iupac = $_; run3 [ 'java', '-jar', '/usr/share/java/opsin.jar' ], \$iupac, \$smiles, \$stderr; $smiles =~ s/\n$//; } $iupac =~ s/'/\\'/g; print "{ smiles => '$smiles', iupac => '$iupac'"; print ', AUTHOR => 1' if $opt->author; print ' },'; print " # $source" if $source; print "\n"; } ChemOnomatopist-0.10.0/scripts/parse-molecular-graph000077500000000000000000000017141463750375500225320ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::MolecularGraph; use Chemistry::OpenSMILES::Parser; use List::Util qw( uniq ); use Scalar::Util qw( blessed ); local $, = "\t"; local $\ = "\n"; while( my $SMILES = <> ) { chomp $SMILES; eval { my $parser = Chemistry::OpenSMILES::Parser->new; my @graphs = map { ChemOnomatopist::MolecularGraph->new( $_ ) } $parser->parse( $SMILES ); die "separate molecular entities are not handled yet\n" if @graphs > 1; my $graph = shift @graphs; ChemOnomatopist::find_groups( $graph ); my @groups = sort { $a cmp $b } uniq map { blessed $_ } grep { blessed $_ } ( $graph->groups, $graph->vertices ); print $SMILES, join ' ', @groups; }; if( $@ ) { $@ =~ s/\n$//; print $SMILES, $@; } } ChemOnomatopist-0.10.0/t/000077500000000000000000000000001463750375500151635ustar00rootroot00000000000000ChemOnomatopist-0.10.0/t/01_alkane.t000066400000000000000000000104651463750375500171110ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Old; use Test::More; my @cases = ( { smiles => 'CCCC', iupac => 'butane' }, { smiles => 'CCCCC', iupac => 'pentane' }, { smiles => 'CC(C)C', iupac => '2-methylpropane' }, { smiles => 'C1CCC1', iupac => 'cyclobutane' }, { smiles => 'c1ccccccccccccc1', iupac => 'cyclotetradecaheptaene' }, { smiles => 'CCCCCCCCCCC(C)CCCC', iupac => '5-methylpentadecane' }, { smiles => 'CC(C)CC(CCC(C)C)C', iupac => '2,4,7-trimethyloctane' }, { smiles => 'CCC(C)(C)CCCCCC(C)C', iupac => '2,8,8-trimethyldecane' }, { smiles => 'C(C)C(CCC(CCC(C)C)(C)C)C', iupac => '2,5,5,8-tetramethyldecane' }, { smiles => 'C(C)C(CC(C)C)CC', iupac => '4-ethyl-2-methylhexane' }, { smiles => 'C(C)C(C(CC)(C)C)CCC', iupac => '4-ethyl-3,3-dimethylheptane' }, { smiles => 'CCCC(CCC)(C(C)C)C(C)C', iupac => '4,4-di(propan-2-yl)heptane' }, { smiles => 'CCCC(CCC)C(C)C', iupac => '4-propan-2-ylheptane', AUTHOR => 1 }, # PubChem 142981 { smiles => 'CCCCC(CCCC)C(C)C(C)C', iupac => '5-(3-methylbutan-2-yl)nonane' }, { smiles => 'CCCCCC(CC(C)CC)CC(CCCCC)C(C)CCC', iupac => '6-(2-methylbutyl)-8-pentan-2-yltridecane', AUTHOR => 1 }, # PubChem 54207584 { smiles => 'CCCCCCCCCCCCCCCCCCCCCCCCCCCCCC(CCCCCCCCCC)CCCCCCCCCCCCC', iupac => '14-decyltritetracontane' }, { smiles => 'CCCCCCCC(CCCCCCC)(CCCCCCC)C(C)CC', iupac => '8-butan-2-yl-8-heptylpentadecane', AUTHOR => 1 }, # PubChem 154365261 { smiles => 'CCC(C)C(CC(C(C)CC)C(C)CC)C(C)CC', iupac => '4,6-di(butan-2-yl)-3,7-dimethylnonane' }, { smiles => 'CCCCCCCCCCCCC(CCC)(CCC)C(C(C)(C)C)C(CCCC)(CCCCCC)C(CC)(CCC)C(C)CCCC', iupac => '7-butyl-8-tert-butyl-6-ethyl-7-hexyl-5-methyl-6,9,9-tripropylhenicosane', AUTHOR => 1 }, { smiles => 'CC(CC(CC(CCCC)CC)CCCCCC)CC(C)C', iupac => '7-(2,4-dimethylpentyl)-5-ethyltridecane' }, # BBv2 P-14.5.2 { smiles => 'C(C)C(C(CCC)C)(C(CCCC)C)C', iupac => '5-ethyl-4,5,6-trimethyldecane' }, { smiles => 'C(C)C(C(CC)C)C(C(CCC)(C)C)(CC)CC', iupac => '4,5,5-triethyl-3,6,6-trimethylnonane' }, { smiles => 'C(C)C(C(C(CCC)C)(CCC)CCC)(CCCC)CCC', iupac => '6-ethyl-4-methyl-5,5,6-tripropyldecane' }, { smiles => 'C(C)C(C(CCC)(C)C)(C(C(CCC)(C)CC)CCC)CCC', iupac => '5,7-diethyl-4,4,7-trimethyl-5,6-dipropyldecane' }, { smiles => 'CCC(CC)C(C)C', iupac => '3-ethyl-2-methylpentane' }, { smiles => 'CCCCCCCCCCCCCCCCCCCCCCC', iupac => 'tricosane' }, { smiles => 'CC(CC(CCC)CCC)C', iupac => '2-methyl-4-propylheptane' }, { smiles => 'CC(CC(CC(CC(CC)CC)C)(CC(CC(CC)C)C)CC(CC(CC)C)C)CC(CC)C', iupac => '7,7-bis(2,4-dimethylhexyl)-3-ethyl-5,9,11-trimethyltridecane', AUTHOR => 1 }, # Fails 'old' nondeterministically on some machines { smiles => 'CC(C(CCC)C)C(CC(CCCC)CC)CCCCCC', iupac => '5-ethyl-7-(3-methylhexan-2-yl)tridecane' }, { smiles => 'CCC(CC)CCC(CCC(CC)CC)CCC(CCC(CCC(CC)CC)CCC(CC)CC)CCC(CCC(CC)CC)CCC(CC)CC', iupac => '3,15-diethyl-9-[6-ethyl-3-(3-ethylpentyl)octyl]-6,12-bis(3-ethylpentyl)heptadecane', AUTHOR => 1 }, # different order { smiles => 'CC(C)CC(CC(C)C)CC(CC(C)C)CC(C)C', iupac => '2,8-dimethyl-4,6-bis(2-methylpropyl)nonane' }, { smiles => 'CCCCCCCCCC(CCCC)(CCCC)C(C)(C)C', iupac => '5-butyl-5-tert-butyltetradecane' }, { smiles => 'CCCCC(CC)C(CCCC)C(C)(C)C', iupac => '5-ethyl-6-tert-butyldecane' }, { smiles => 'CCCCC(CCCC)CCCCCCCC(C(C)CCC)C(C)(C)C', iupac => '5-tert-butyl-13-butyl-4-methylheptadecane', AUTHOR => 1 }, { smiles => 'CC(CC)C(CCCC)(CCCCCCCCCCCCCCCCCCCCCCCCCC)CCCC', iupac => '5-(butan-2-yl)-5-butylhentriacontane' }, # BBv2 P-14.5.1 { smiles => 'CCCCCC(C)(C)C(C)CC(CC)CC(CCC(CC)CC)C(CCC(CC)CCC)(CC(CC)CCCC)C(CC)(CCCC(CC)CC)C(CC(C)C(CC)CC)(CC(C)(CC)CCC)C(C)C(CC(C)CC(C)CC)(CC(C)C(CC)CCC)CC(C)(CC)CCCC', iupac => '10,14-diethyl-11-(2-ethylhexyl)-11-(3-ethylhexyl)-10-(4-ethylhexyl)-7-(2-ethyl-2-methylhexyl)-7-(3-ethyl-2-methylhexyl)-9-(2-ethyl-2-methylpentyl)-9-(3-ethyl-2-methylpentyl)-12-(3-ethylpentyl)-3,5,8,16,17,17-hexamethyldocosane' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => 2 * @cases; for my $case (@cases) { my $name = $case->{iupac}; is ChemOnomatopist::get_name( $case->{smiles} ), $name, 'new'; is ChemOnomatopist::Old::get_name( $case->{smiles} ), $name, 'old'; } ChemOnomatopist-0.10.0/t/02_cod.t000066400000000000000000000036721463750375500164260ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use DBI; use IPC::Run3; use Test::More; my $OPSIN_JAR = '/usr/share/java/opsin.jar'; if( !$ENV{EXTENDED_TESTING} ) { plan skip_all => "Skip \$ENV{EXTENDED_TESTING} is not set\n"; } plan skip_all => "OPSIN is not installed\n" unless -e $OPSIN_JAR; my $dbh = db_connect( 'mysql', 'www.crystallography.net', 'cod', 3306, 'cod_reader', '' ); # FIXME: Skip tests if connection is unsuccessful. (A.M.) my $sth = $dbh->prepare( 'SELECT chemname, value AS smiles FROM data JOIN smiles ON file = cod_id WHERE chemname IS NOT NULL' ); $sth->execute; my %tests; while (my $item = $sth->fetchrow_hashref) { if ($item->{'smiles'} =~ /^[CchH\[\]\(\)\-\+]+$/) { $tests{$item->{'chemname'}} = $item->{'smiles'}; } } my %opsin_approved; for my $compound (keys %tests) { my( $smiles, $stderr ); run3 "java -jar $OPSIN_JAR", \"$compound\n", \$smiles, \$stderr; chomp $smiles; next unless $smiles; # Skipping names that were not understood by OPSIN if( $smiles eq $tests{$compound} ) { # Ensuring OPSIN approval $opsin_approved{$smiles} = $compound; } } plan tests => scalar keys %opsin_approved; for my $case (keys %opsin_approved) { # FIXME: Chemical name may have initial letter uppercased, but it may # not be the right choice to lowercase it before comparison. # Need to think a bit more on how to deal with it. (A.M.) is( ChemOnomatopist::get_name( $case ), $opsin_approved{$case} ); } sub db_connect { my ($db_platform, $db_host, $db_name, $db_port, $db_user, $db_pass) = @_; my $dsn = "dbi:$db_platform:hostname=$db_host;dbname=$db_name;" . "user=$db_user;password=$db_pass"; my $options = { PrintError => 0, mysql_enable_utf8 => 1 }; my $dbh = DBI->connect( $dsn, $db_user, $db_pass, $options ); die 'could not connect to the database - ' . lcfirst( $DBI::errstr ) unless $dbh; return $dbh } ChemOnomatopist-0.10.0/t/04_longest_paths.t000066400000000000000000000107761463750375500205400ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Chain::FromHalves; use ChemOnomatopist::ChainHalf; use ChemOnomatopist::Util::Graph qw( graph_longest_paths graph_longest_paths_from_vertex ); use Graph::Undirected; use Test::More; sub chains_odd($@) { my( $graph, @halves ) = @_; @halves = map { ChemOnomatopist::ChainHalf->new( $graph, undef, @$_ ) } @halves; my @chains; for my $half1 (@halves) { for my $half2 (@halves) { next if $half1 == $half2; push @chains, ChemOnomatopist::Chain::FromHalves->new( $half1, $half2 ); } } return @chains; } sub chains_even($@) { my( $graph, @halves ) = @_; @halves = map { ChemOnomatopist::ChainHalf->new( $graph, shift @$_, @$_ ) } @halves; my @chains; for my $half1 (@halves) { for my $half2 (@halves) { next if $half1->group eq $half2->group; push @chains, ChemOnomatopist::Chain::FromHalves->new( $half1, $half2 ); } } return @chains; } plan tests => 12; my $graph; my @paths; $graph = Graph::Undirected->new; for (1..10) { $graph->add_edge( 0, $_ ); } is( scalar graph_longest_paths_from_vertex( $graph, 0 ), 10 ); is( scalar graph_longest_paths( $graph ), 45 ); $graph->add_edge( 1, 11 ); is( scalar graph_longest_paths_from_vertex( $graph, 0 ), 1 ); is( scalar graph_longest_paths( $graph ), 9 ); # Elongated X-shaped graph with an odd-numbered longest path $graph = Graph::Undirected->new; $graph->add_path( 'A'..'C' ); $graph->add_edge( 'A', 'A1' ); $graph->add_edge( 'A', 'A2' ); $graph->add_edge( 'C', 'C1' ); $graph->add_edge( 'C', 'C2' ); is scalar graph_longest_paths( $graph ), 4; @paths = ChemOnomatopist::rule_greatest_number_of_side_chains( chains_odd $graph, [ 'B', 'A', 'A1' ], [ 'B', 'C', 'C1' ] ); is scalar @paths, 2; @paths = ChemOnomatopist::rule_greatest_number_of_side_chains( chains_odd $graph, [ 'B', 'A', 'A1' ], [ 'B', 'A', 'A2' ], [ 'B', 'C', 'C1' ] ); is scalar @paths, 6; # Elongated X-shaped graph with an even-numbered longest path $graph = Graph::Undirected->new; $graph->add_path( 'A'..'D' ); $graph->add_edge( 'A', 'A1' ); $graph->add_edge( 'A', 'A2' ); $graph->add_edge( 'D', 'D1' ); $graph->add_edge( 'D', 'D2' ); is scalar graph_longest_paths( $graph ), 4; @paths = ChemOnomatopist::rule_greatest_number_of_side_chains( chains_even $graph, [ 'C', 'B', 'A', 'A1' ], [ 'B', 'C', 'D', 'D1' ] ), is scalar @paths, 2; @paths = ChemOnomatopist::rule_greatest_number_of_side_chains( chains_even $graph, [ 'C', 'B', 'A', 'A1' ], [ 'C', 'B', 'A', 'A2' ], [ 'B', 'C', 'D', 'D1' ] ); is scalar @paths, 4; # Elongated Y-shaped graph with an even-numbered longest path $graph = Graph::Undirected->new; $graph->add_path( 'AA1'..'AA5' ); $graph->add_path( 'AB1'..'AB5' ); $graph->add_path( 'BA1'..'BA5' ); $graph->add_edge( 'A', 'AA1' ); $graph->add_edge( 'A', 'AB1' ); $graph->add_edge( 'B', 'BA1' ); $graph->add_edge( 'A', 'B' ); @paths = ChemOnomatopist::rule_greatest_number_of_side_chains( chains_even $graph, [ 'B', 'A', 'AA1'..'AA5' ], [ 'B', 'A', 'AB1'..'AB5' ], [ 'A', 'B', 'BA1'..'BA5' ] ); is scalar @paths, 4; $graph->add_path( 'AA2', 'branch' ); @paths = ChemOnomatopist::rule_greatest_number_of_side_chains( chains_even $graph, [ 'B', 'A', 'AA1'..'AA5' ], [ 'B', 'A', 'AB1'..'AB5' ], [ 'A', 'B', 'BA1'..'BA5' ] ); is scalar @paths, 2; ChemOnomatopist-0.10.0/t/05_numbers.t000066400000000000000000000102741463750375500173330ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; # Collected by Miglė my @prefixes = qw( meth eth prop but pent hex hept oct non dec undec dodec tridec tetradec pentadec hexadec heptadec octadec nonadec icos henicos docos tricos tetracos pentacos hexacos heptacos octacos nonacos triacont hentriacont dotriacont tritriacont tetratriacont pentatriacont hexatriacont heptatriacont octatriacont nonatriacont tetracont hentetracont dotetracont tritetracont tetratetracont pentatetracont hexatetracont heptatetracont octatetracont nonatetracont pentacont henpentacont dopentacont tripentacont tetrapentacont pentapentacont hexapentacont heptapentacont octapentacont nonapentacont hexacont henhexacont dohexacont trihexacont tetrahexacont pentahexacont hexahexacont heptahexacont octahexacont nonahexacont heptacont henheptacont doheptacont triheptacont tetraheptacont pentaheptacont hexaheptacont heptaheptacont octaheptacont nonaheptacont octacont henoctacont dooctacont trioctacont tetraoctacont pentaoctacont hexaoctacont heptaoctacont octaoctacont nonaoctacont nonacont hennonacont dononacont trinonacont tetranonacont pentanonacont hexanonacont heptanonacont octanonacont nonanonacont hect henhect dohect trihect tetrahect pentahect hexahect heptahect octahect nonahect decahect undecahect dodecahect tridecahect tetradecahect pentadecahect hexadecahect heptadecahect octadecahect nonadecahect icosahect henicosahect docosahect tricosahect tetracosahect pentacosahect hexacosahect heptacosahect octacosahect nonacosahect triacontahect hentriacontahect dotriacontahect tritriacontahect tetratriacontahect pentatriacontahect hexatriacontahect heptatriacontahect octatriacontahect nonatriacontahect tetracontahect hentetracontahect dotetracontahect tritetracontahect tetratetracontahect pentatetracontahect hexatetracontahect heptatetracontahect octatetracontahect nonatetracontahect pentacontahect henpentacontahect dopentacontahect tripentacontahect tetrapentacontahect pentapentacontahect hexapentacontahect heptapentacontahect octapentacontahect nonapentacontahect hexacontahect henhexacontahect dohexacontahect trihexacontahect tetrahexacontahect pentahexacontahect hexahexacontahect heptahexacontahect octahexacontahect nonahexacontahect heptacontahect henheptacontahect doheptacontahect triheptacontahect tetraheptacontahect pentaheptacontahect hexaheptacontahect heptaheptacontahect octaheptacontahect nonaheptacontahect octacontahect henoctacontahect dooctacontahect trioctacontahect tetraoctacontahect pentaoctacontahect hexaoctacontahect heptaoctacontahect octaoctacontahect nonaoctacontahect nonacontahect hennonacontahect dononacontahect trinonacontahect tetranonacontahect pentanonacontahect hexanonacontahect heptanonacontahect octanonacontahect nonanonacontahect ); my %cases = ( # Taken from: # https://en.wikipedia.org/w/index.php?title=IUPAC_numerical_multiplier&oldid=1086173027 241 => 'hentetracontadict', 411 => 'undecatetract', 548 => 'octatetracontapentact', 9267 => 'heptahexacontadictanonali', # Taken from BBv2 P-14.2.1.2 363 => 'trihexacontatrict', 486 => 'hexaoctacontatetract', ); plan tests => scalar( @prefixes ) + scalar keys %cases; for (0..$#prefixes) { is( ChemOnomatopist::alkane_chain_name( $_ + 1 ), $prefixes[$_], "Number $_" ); } for (sort keys %cases) { is( ChemOnomatopist::alkane_chain_name( $_ ), $cases{$_}, "Number $_" ); } ChemOnomatopist-0.10.0/t/06_unsupported.t000066400000000000000000000021261463750375500202460ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my %unsupported = ( # 'N1NNN1' => 'cannot handle complicated monocycles for now', # FIXME: This is not supported # BBv3 P-52.2.4.1 'C12=CC=CC=C2C1' => 'bicycles with three and four-membered cycles are not supported yet', 'C12=CC=CC=C2C=C1' => 'bicycles with three and four-membered cycles are not supported yet', 'C1=CC=C2C(=C1)C=CC3=C2N=CC=C3' => 'cannot handle complicated cyclic compounds', # PubChem 9191 'C1=CC=CC=2C1=C1C=C3C=C4C=CC=CC4=CC3=CC1=CC2' => 'cannot handle complicated cyclic compounds', # PubChem 67470 'C1=CC2=C3C=CC(=CC=C4C=CC(=C5C=CC(=CC=C1C=C2)C=C5)C=C4)C=C3' => 'cannot handle complicated cyclic compounds', # PubChem 157100544 'P1(Cl)Oc2c(cc3ccccc3c2)C(=O)O1' => 'cannot name xanthene derivatives', # COD 2201535 ); plan tests => scalar keys %unsupported; for my $case (sort keys %unsupported) { my $message; eval { ChemOnomatopist::get_name( $case ) }; $message = $@ if $@; $message =~ s/\n$// if $message; is( $message, $unsupported{$case} ); } ChemOnomatopist-0.10.0/t/07_trees.t000066400000000000000000000006011463750375500167750ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist::Util::Graph qw( tree_number_of_branches ); use Graph::Undirected; use Test::More; plan tests => 3; my $graph = Graph::Undirected->new; $graph->add_path( 'A'..'E' ); is tree_number_of_branches( $graph, 'A'..'E' ), 0; is tree_number_of_branches( $graph, 'A'..'D' ), 1; is tree_number_of_branches( $graph, 'B'..'D' ), 2; ChemOnomatopist-0.10.0/t/08_locants.t000066400000000000000000000045111463750375500173230ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Chain::FromHalves; use ChemOnomatopist::ChainHalf; use ChemOnomatopist::MolecularGraph; use Test::More; sub chains($@) { my( $graph, @halves ) = @_; @halves = map { ChemOnomatopist::ChainHalf->new( $graph, undef, @$_ ) } @halves; my @chains; for my $half1 (@halves) { for my $half2 (@halves) { next if $half1 == $half2; push @chains, ChemOnomatopist::Chain::FromHalves->new( $half1, $half2 ); } } return @chains; } plan tests => 4; my $graph; my @paths; my @atoms = map { { symbol => 'C', number => $_ } } 0..38; $graph = ChemOnomatopist::MolecularGraph->new; $graph->add_path( map { $atoms[$_] } ( 1, 0, 11..17 ) ); $graph->add_path( map { $atoms[$_] } ( 1, 22..27 ) ); $graph->add_path( map { $atoms[$_] } ( 1, 32..37 ) ); $graph->add_edge( map { $atoms[$_] } ( 11, 18 ) ); $graph->add_edge( map { $atoms[$_] } ( 25, 28 ) ); $graph->add_edge( map { $atoms[$_] } ( 32, 38 ) ); @paths = ChemOnomatopist::rule_lowest_numbered_locants( chains $graph, [ map { $atoms[$_] } ( 0, 11..17 ) ], [ map { $atoms[$_] } ( 0, 1, 22..27 ) ], [ map { $atoms[$_] } ( 0, 1, 32..37 ) ] ); is scalar( @paths ), 1; is join( ',', map { $_->{number} } $paths[0]->vertices ), '27,26,25,24,23,22,1,0,11,12,13,14,15,16,17'; my $chain = ChemOnomatopist::select_mainchain( $graph ); is join( ',', map { $_->{number} } $chain->vertices ), '27,26,25,24,23,22,1,0,11,12,13,14,15,16,17'; # Figure 7 from Urbonaitė, 2022. # In the image, however, one branch is held as having priority over another, while in fact they are equal. $graph = Graph::Undirected->new; $graph->add_path( 'A'..'G' ); $graph->add_path( 'B', 'H' ); $graph->add_path( 'D', 'I'..'K' ); $graph->add_path( 'J', 'L' ); $graph->add_path( 'E', 'M' ); @paths = ChemOnomatopist::rule_lowest_numbered_locants( chains $graph, [ reverse 'A'..'D' ], [ 'D'..'G' ], [ 'D', 'I'..'K' ] ); is scalar( @paths ), 2; ChemOnomatopist-0.10.0/t/09_sidechain_carbons.t000066400000000000000000000025721463750375500213240ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use Algorithm::Combinatorics qw(combinations); use ChemOnomatopist; use ChemOnomatopist::Chain::FromHalves; use ChemOnomatopist::ChainHalf; use Graph::Undirected; use Test::More; sub chain($@) { my( $graph, @vertices ) = @_; return ChemOnomatopist::ChainHalf->new( $graph, $vertices[1], @vertices ); } plan tests => 2; my $graph; my @chains; $graph = Graph::Undirected->new; $graph->add_path( 0, 11..15 ); $graph->add_path( 0, 21..25 ); $graph->add_path( 0, 31..35 ); $graph->add_path( 0, 41..45 ); $graph->add_path( 12, 16 ); $graph->add_path( 22, 26, 27 ); @chains = map { ChemOnomatopist::Chain::FromHalves->new( @$_ ) } grep { $_->[0] ne $_->[1] } combinations [ chain( $graph, 0, 11..15 ), chain( $graph, 0, 21..25 ), chain( $graph, 0, 31..35 ), chain( $graph, 0, 41..45 ) ], 2; @chains = ChemOnomatopist::rule_most_carbon_in_side_chains( @chains ); is scalar( @chains ), 6; @chains = map { ChemOnomatopist::Chain::FromHalves->new( @$_ ) } grep { $_->[0] ne $_->[1] } combinations [ chain( $graph, 0, 11..15 ), chain( $graph, 0, 31..35 ), chain( $graph, 0, 41..45 ) ], 2; @chains = ChemOnomatopist::rule_most_carbon_in_side_chains( @chains ); is scalar( @chains ), 3; ChemOnomatopist-0.10.0/t/10_locants.t000066400000000000000000000025071463750375500173170ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Chain::FromHalves; use ChemOnomatopist::ChainHalf; use Graph::Undirected; use Test::More; sub chain { my( $graph, $other_center, @vertices ) = @_; return ChemOnomatopist::ChainHalf->new( $graph, $other_center, @vertices ); } plan tests => 4; my @atoms = map { { symbol => 'C', number => $_ } } 0..99; # A graph representing octane my $graph = Graph::Undirected->new( refvertexed => 1 ); $graph->add_path( map { $atoms[$_] } ( 1..8 ) ); my( $A, $B ); $A = chain( $graph, map { $atoms[$_] } ( 5, reverse 1..4 ) ); $B = chain( $graph, map { $atoms[$_] } ( 4, 5..8 ) ); is( ChemOnomatopist::Chain::FromHalves->new( $A, $B )->branch_positions, 0 ); is( ChemOnomatopist::Chain::FromHalves->new( $B, $A )->branch_positions, 0 ); # The following additions transform the graph to 2,4,7-trimethyloctane $graph->add_edge( map { $atoms[$_] } ( 2, 12 ) ); $graph->add_edge( map { $atoms[$_] } ( 4, 42 ) ); $graph->add_edge( map { $atoms[$_] } ( 7, 72 ) ); $A = chain( $graph, map { $atoms[$_] } ( 5, reverse 1..4 ) ); $B = chain( $graph, map { $atoms[$_] } ( 4, 5..8 ) ); is join( ',', ChemOnomatopist::Chain::FromHalves->new( $A, $B )->branch_positions ), '1,3,6'; is join( ',', ChemOnomatopist::Chain::FromHalves->new( $B, $A )->branch_positions ), '1,4,6'; ChemOnomatopist-0.10.0/t/11_ketones.t000066400000000000000000000015531463750375500173250ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my %SMILES_cases = ( # 'CCCC=O' => 'butan-1-one', # FIXME 'CC(=O)CC' => 'butan-2-one', 'CCCC(=O)CCCC(=O)CCCC(=O)CCCC(=O)C' => 'heptadecane-2,6,10,14-tetrone', 'CC(C)C(=O)CC(=O)C' => '5-methylhexane-2,4-dione', 'CC(C)CC(=O)C(CCC(=O)C)C(=O)CC(C)C' => '8-methyl-5-(3-methyl-1-oxobutyl)nonane-2,6-dione', # PubChem has '8-methyl-5-(3-methylbutanoyl)nonane-2,6-dione' # From BBv2 P-64.6.1 'CC(=S)CC' => 'butane-2-thione', 'CCC(CCC)=[Se]' => 'hexane-3-selone', 'CC(CC(C)=S)=S' => 'pentane-2,4-dithione', 'OC(C)C(C(C(C(C)O)=C)=O)=C' => '2,6-dihydroxy-3,5-dimethylideneheptan-4-one', # BBv2 P-64.7.1 ); plan tests => scalar( keys %SMILES_cases ); for my $case (sort keys %SMILES_cases) { is ChemOnomatopist::get_name( $case ), $SMILES_cases{$case}; } ChemOnomatopist-0.10.0/t/12_hydroxy.t000066400000000000000000000035401463750375500173620ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'OCC(CO)(CO)CO', iupac => '2,2-bis(hydroxymethyl)propane-1,3-diol' }, { smiles => 'C1(CCCCC1)CCO', iupac => '2-cyclohexylethan-1-ol' }, # From BBv2 P-13.5.2 { smiles => 'C1(CCCCC1)CO', iupac => 'cyclohexylmethanol' }, # From BBv2 P-15.6.1.1 { smiles => 'ClCCCCC(CCO)C(C)O', iupac => '3-(4-chlorobutyl)pentane-1,4-diol' }, # From BBv2 P-44.1.1 { smiles => 'CC(C)(C)O', iupac => '2-methylpropan-2-ol' }, # From BBv2 P-63.1.2 { smiles => 'ON1CC(CCC1)C#N', iupac => '1-hydroxypiperidine-3-carbonitrile' }, # From BBv2 P-63.1.4 { smiles => 'ClCCC(CC(C)O)O', iupac => '6-chlorohexane-2,4-diol' }, # From BBv3 P-92.1.4.1 # From BBv3 P-63.1.5 { smiles => 'CC(C)S', iupac => 'propane-2-thiol' }, { smiles => 'C(C)[SeH]', iupac => 'ethaneselenol' }, { smiles => 'SCCCCS', iupac => 'butane-1,4-dithiol' }, { smiles => 'SC1=CC=CC=C1', iupac => 'benzenethiol' }, { smiles => 'SCCC(=O)O', iupac => '3-sulfanylpropanoic acid' }, { smiles => 'SC1=C(C=CC=C1)O', iupac => '2-sulfanylphenol' }, { smiles => 'SC1=CC=C(C=C1)SSC=1C=C(C=CC1)S', iupac => '3-[(4-sulfanylphenyl)disulfanyl]benzene-1-thiol' }, { smiles => 'OC(CS)C1CCC(C(C1)O)S', iupac => '5-(1-hydroxy-2-sulfanylethyl)-2-sulfanylcyclohexan-1-ol' }, { smiles => 'SC(CC(=O)O)CS', iupac => '3,4-bis(sulfanyl)butanoic acid' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/13_multigroup.t000066400000000000000000000030611463750375500200620ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'CC(=O)CC(=O)CCCO', iupac => '7-hydroxyheptane-2,4-dione' }, { smiles => 'CC(C)C(CCC(C)O)CCC(=O)C', iupac => '8-hydroxy-5-propan-2-ylnonan-2-one', AUTHOR => 1 }, # PubChem 537619 { smiles => 'C(O)OO', iupac => 'hydroperoxymethanol' }, { smiles => 'OOCC(=O)C', iupac => '1-hydroperoxypropan-2-one' }, # Unchecked # From BBv2 P-65.1.2.4 { smiles => 'CC(=O)CCCC(=O)O', iupac => '5-oxohexanoic acid' }, { smiles => 'C(=O)(O)C(O)C(C(=O)O)C(=O)C(=O)O', iupac => '1-hydroxy-3-oxopropane-1,2,3-tricarboxylic acid' }, { smiles => 'O=CCCC(=O)O', iupac => '4-oxobutanoic acid', AUTHOR => 1 }, # BBv2 P-66.6.1.3 - FIXME { smiles => 'CCCCCCCCC(C=O)C(CC)O', iupac => '2-(1-hydroxypropyl)decanal' }, { smiles => 'S=C(CC(=O)O)C', iupac => '3-sulfanylidenebutanoic acid' }, { smiles => '[N+](=O)([O-])C', iupac => 'nitromethane' }, # BBv2 P-61.5.1 { smiles => 'I(=O)(=O)(=O)CCC', iupac => 'periodylpropane', AUTHOR => 1 }, { smiles => 'I(=O)(=O)(=O)C(C)C', iupac => '2-periodylpropane' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/14_carboxylic_acids.t000066400000000000000000000101671463750375500211630ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-65.1.2 { smiles => 'CCCC(=O)O', iupac => 'butanoic acid' }, { smiles => 'OC(=O)CCCCCCCCCCC(=O)O', iupac => 'dodecanedioic acid' }, # From BBv2 P-65.1.2.2.1 { smiles => 'OC(=O)CCC(C(=O)O)CCC(=O)O', iupac => 'pentane-1,3,5-tricarboxylic acid' }, { smiles => 'C(C(=O)O)(C(=O)O)C(C(=O)O)(C(=O)O)', iupac => 'ethane-1,1,2,2-tetracarboxylic acid' }, # From BBv3 P-65.1.2.2.3 { smiles => 'C(=O)(O)CC(CC(=O)O)CCCC(=O)O', iupac => '3-(carboxymethyl)heptanedioic acid', AUTHOR => 1 }, # From BBv3 P-65.1.5.1 { smiles => 'C(CCCCC)(O)=S', iupac => 'hexanethioic O-acid' }, { smiles => 'C(CCCCC)(=[Se])S', iupac => 'hexaneselenothioic S-acid' }, { smiles => 'C(CCCCC(=S)S)(=S)S', iupac => 'hexanebis(dithioic acid)' }, { smiles => 'N1(CCCCC1)C(=S)S', iupac => 'piperidine-1-carbodithioic acid' }, { smiles => 'C1CCCCC1C(=S)[SeH]', iupac => 'cyclohexanecarboselenothioic Se-acid' }, { smiles => 'C(C)(=S)C1=CC=C(C(=O)O)C=C1', iupac => '4-(ethanethioyl)benzoic acid', AUTHOR => 1 }, { smiles => 'O=C(CCC(=O)O)S', iupac => '4-oxo-4-sulfanylbutanoic acid', AUTHOR => 1 }, { smiles => 'OC(CCC(=O)O)=S', iupac => '4-hydroxy-4-sulfanylidenebutanoic acid', AUTHOR => 1 }, { smiles => 'O=C(C(=O)O)S', iupac => 'oxo(sulfanyl)acetic acid', AUTHOR => 1 }, { smiles => 'OC(=S)C1=CC(=NC=C1)C(=O)O', iupac => '4-(hydroxycarbonothioyl)pyridine-2-carboxylic acid' }, { smiles => 'SC(=O)C1=CC(=NC=C1)C(=O)O', iupac => '4-(sulfanylcarbonyl)pyridine-2-carboxylic acid' }, { smiles => 'C(CC)(=N)S', iupac => 'propanimidothioic acid', AUTHOR => 1 }, { smiles => 'C(CCC)(=NN)[SeH]', iupac => 'butanehydrazonoselenoic acid', AUTHOR => 1 }, { smiles => 'SN=C(O)C1CCCC1', iupac => 'N-sulfanylcyclopentanecarboximidic acid', AUTHOR => 1 }, { smiles => 'ON=C([SeH])C1CCCCC1', iupac => 'N-hydroxycyclohexanecarboximidoselenoic acid', AUTHOR => 1 }, { smiles => 'NC(=CC(=S)S)SCC', iupac => '3-amino-3-(ethylsulfanyl)prop-2-ene(dithioic acid)', AUTHOR => 1 }, # From BBv2 P-65.1.5.2 { smiles => 'C(C)(O)=S', iupac => 'ethanethioic O-acid' }, { smiles => 'C(S)=O', iupac => 'methanethioic S-acid', AUTHOR => 1 }, { smiles => 'O=C(CCC(=O)O)S', iupac => '4-oxo-4-sulfanylbutanoic acid', AUTHOR => 1 }, { smiles => 'OC(C(=O)O)=S', iupac => 'hydroxy(sulfanylidene)acetic acid', AUTHOR => 1 }, { smiles => '[SeH]C(=O)C1=CC=C(C(=O)O)C=C1', iupac => '4-(selanylcarbonyl)benzoic acid' }, { smiles => 'C=1(C(=CC=CC1)C(=S)S)C(=S)S', iupac => 'benzene-1,2-dicarbodithioic acid' }, { smiles => 'C(C(=S)S)(=S)S', iupac => 'ethanebis(dithioic acid)' }, # From BBv3 P-65.1.5.3 { smiles => 'CC(=O)OS', iupac => 'ethane(thioperoxoic) OS-acid', AUTHOR => 1 }, { smiles => 'c1ccccc1C(=O)SO', iupac => 'benzenecarbo(thioperoxoic) SO-acid' }, { smiles => 'C1=C(C=CC2=CC=CC=C12)C(OO)=S', iupac => 'naphthalene-2-carboperoxothioic acid' }, { smiles => 'S=C(CCC(=O)O)OS', iupac => '4-sulfanylidene-4-(sulfanyloxy)butanoic acid', AUTHOR => 1 }, { smiles => 'C(=S)(OS)CCC(=O)O', iupac => '3-(dithiocarbonoperoxoyl)propanoic acid', AUTHOR => 1 }, { smiles => 'C(=S)(OS)C(=O)O', iupac => '(dithiocarbonoperoxoyl)formic acid', AUTHOR => 1 }, { smiles => 'OSC(C(=O)O)=O', iupac => '(hydroxysulfanyl)oxoacetic acid', AUTHOR => 1 }, { smiles => 'OSC(=O)C1CCC(CC1)C(=O)O', iupac => '4-[(hydroxysulfanyl)carbonyl]cyclohexanecarboxylic acid', AUTHOR => 1 }, { smiles => 'C(=O)O', iupac => 'formic acid', AUTHOR => 1 }, { smiles => 'C1=CC2=C(C=C1C(=O)O)SC(=N2)C(F)F', iupac => '2-(difluoromethyl)-1,3-benzothiazole-6-carboxylic acid' }, # PubChem 84692459 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/15_aldehydes.t000066400000000000000000000040601463750375500176170ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'O=CCCCCCCC1CC(CCC1)C=O', iupac => '3-(7-oxoheptyl)cyclohexane-1-carbaldehyde', AUTHOR => 1 }, # From BBv3 P-59.2.1.7 # From BBv2 P-66.6.1.1.1 { smiles => 'CCCCC=O', iupac => 'pentanal' }, { smiles => 'O=CCCCC=O', iupac => 'pentanedial' }, { smiles => 'O=CCC(C=O)CCC=O', iupac => 'butane-1,2,4-tricarbaldehyde' }, # From BBv2 P-66.6.1.1.2 { smiles => 'CN(N=NN(C)C)C=O', iupac => '1,4,4-trimethyltetraaz-2-ene-1-carbaldehyde', AUTHOR => 1 }, # From BBv2 P-66.6.1.1.3 # From BBv2 P-66.6.3 { smiles => 'C(C)=S', iupac => 'ethanethial' }, { smiles => 'C1(=CC=CC=C1)C=S', iupac => 'benzenecarbothialdehyde' }, { smiles => 'C(CCCCC)=[Se]', iupac => 'hexaneselenal' }, { smiles => 'C(CCCC=S)=S', iupac => 'pentanedithial' }, { smiles => 'C(=S)C1=CC=C(C(=O)O)C=C1', iupac => '4-(methanethioyl)benzoic acid' }, { smiles => 'C(=[Se])C1CCC(CC1)C(=O)O', iupac => '4-(methaneselenoyl)cyclohexane-1-carboxylic acid' }, { smiles => 'S=C1CCC(CC1)C=[Se]', iupac => '4-sulfanylidenecyclohexane-1-carboselenaldehyde' }, # From BBv2 P-66.6.4 { smiles => 'O=C(CC=O)C', iupac => '3-oxobutanal' }, { smiles => 'C=C(C=O)CCCC', iupac => '2-methylidenehexanal' }, { smiles => 'OC1=C(C=O)C=CC=C1', iupac => '2-hydroxybenzaldehyde' }, { smiles => 'OCC1=CC=C(O1)C=O', iupac => '5-(hydroxymethyl)furan-2-carbaldehyde' }, { smiles => 'O(C1=CC=CC=C1)CC=O', iupac => 'phenoxyacetaldehyde', AUTHOR => 1 }, { smiles => 'FC=1C(=C(C=O)C=CC1)C', iupac => '3-fluoro-2-methylbenzaldehyde' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/16_heteroatoms.t000066400000000000000000000057111463750375500202140ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'COCCOCCOCCOCC', iupac => '2,5,8,11-tetraoxatridecane' }, # BBv2 P-12.1 { smiles => 'C(F)(F)(F)C(F)(F)CO', iupac => '2,2,3,3,3-pentafluoropropan-1-ol' }, # BBv2 P-14.3.4.5 { smiles => 'ClC(C(Cl)(Cl)Cl)(Cl)Cl', iupac => 'hexachloroethane' }, { smiles => 'ClC(=C(Cl)Cl)Cl', iupac => 'tetrachloroethene' }, { smiles => 'C1CCCCC1CCOCC', iupac => '(2-ethoxyethyl)cyclohexane' }, { smiles => 'C1CCCCC1OCCCC', iupac => 'butoxycyclohexane' }, # PubChem 13299482 { smiles => '[SiH3][SiH2]OCCS[SiH2][SiH3]', iupac => '3-oxa-6-thia-1,2,7,8-tetrasilaoctane' }, # BBv2 P-15.3.4.1.1 # From BBv2 P-15.4.3.1 { smiles => 'COCSSCCOCC[Se]C', iupac => '2,8-dioxa-4,5-dithia-11-selenadodecane' }, { smiles => '[SiH3]OCS[SiH3]', iupac => '2-oxa-4-thia-1,5-disilapentane' }, # From BBv2 P-15.4.3.2.1 { smiles => 'C[SiH2]C[SiH2]C[SiH2]CSCC', iupac => '8-thia-2,4,6-trisiladecane' }, { smiles => 'C[SiH2]C[SiH2]C[SiH2]COC', iupac => '2-oxa-4,6,8-trisilanonane', AUTHOR => 1 }, # From BBv2 P-15.4.3.2.3 { smiles => 'C[SiH2]C[SiH2]C[SiH2]C[SiH2]C(=O)O', iupac => '2,4,6,8-tetrasilanonan-1-oic acid' }, { smiles => 'C[SiH2]C[SiH2]C[SiH2]C[SiH2]CCO', iupac => '2,4,6,8-tetrasiladecan-10-ol' }, { smiles => 'C[SiH2]C[SiH2]C[SiH2]C[SiH2]C=C', iupac => '2,4,6,8-tetrasiladec-9-ene' }, # BBv2 P-15.4.3.2.4 # From BBv2 P-21.2.3.2 { smiles => 'COCOCCOCCOC', iupac => '2,4,7,10-tetraoxaundecane' }, { smiles => 'CSC[SiH2]CCOCCOC', iupac => '7,10-dioxa-2-thia-4-silaundecane' }, { smiles => 'CO[SiH2]CC[SiH2]SC', iupac => '2-oxa-7-thia-3,6-disilaoctane' }, { smiles => 'BrC(CCCC(CCl)CBr)Cl', iupac => '1-bromo-5-(bromomethyl)-1,6-dichlorohexane' }, # BBv2 P-45.2.3 # From BBv3 P-61.3.1 { smiles => 'ClC(C)(C)C', iupac => '2-chloro-2-methylpropane' }, { smiles => 'ClC(C)CCCC', iupac => '2-chlorohexane' }, { smiles => 'BrC(Br)Br', iupac => 'tribromomethane' }, # From BBv3 P-61.3.4 # From BBv2 P-61.5.2 { smiles => 'ON(O)OC(C)(C)C(C(O)=O)NC(C)(C)C', iupac => '2-(tert-butylimino)-3-methyl-3-(nitrooxy)butanoic acid', AUTHOR => 1 }, { smiles => 'FC(C(CC)F)C(CC(CCCC)CC)CCCCCC', iupac => '7-(1,2-difluorobutyl)-5-ethyltridecane' }, # BBv2 P-14.5.2 # From BBv2 P-31.1.2.2.1 { smiles => '[SiH3][SiH]=[SiH][SiH2][SiH2][SiH3]', iupac => 'hexasil-2-ene', AUTHOR => 1 }, # FIXME: Need to elide vowels { smiles => '[SiH]#[SiH]', iupac => 'disilyne', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/17_long.t000066400000000000000000000016511463750375500166210ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; if( !$ENV{EXTENDED_TESTING} ) { plan skip_all => "Skip \$ENV{EXTENDED_TESTING} is not set\n"; } my %SMILES_cases = ( 'CCCCCC(CC(CC)CC)CC(CC(CC(CC)CC)CC(CC)CC)CC(CC(CC(CC)CC)CC(CC)CC)CC(CC(CC(CC(CC(CC)CC)CC(CC)CC)CC(CC(CC)CC)CC(CC)CC)CC(CC(CC(CC)CC)CC(CC)CC)CC(CC(CC)CC)CC(CC)CC)CC(CC(CC(CC(CC)CC)CC(CC)CC)CC(CC(CC)CC)CC(CC)CC)CC(CC(CC(CC)CC)CC(CC)CC)CC(CC(CC)CC)CC(CC)CC' => '3-ethyl-5,17-bis(2-ethylbutyl)-11-[8-ethyl-6-(2-ethylbutyl)-2-[6-ethyl-4-(2-ethylbutyl)-2-[4-ethyl-2-(2-ethylbutyl)hexyl]octyl]-4-[4-ethyl-2-(2-ethylbutyl)hexyl]decyl]-9-[6-ethyl-4-(2-ethylbutyl)-2-[4-ethyl-2-(2-ethylbutyl)hexyl]octyl]-7,13,15-tris[4-ethyl-2-(2-ethylbutyl)hexyl]docosane', ); plan tests => scalar( keys %SMILES_cases ); for my $case (sort keys %SMILES_cases) { my $name = $SMILES_cases{$case}; is ChemOnomatopist::get_name( $case ), $name; } ChemOnomatopist-0.10.0/t/18_hydroperoxides.t000066400000000000000000000021761463750375500207360ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'CCOO', iupac => 'ethaneperoxol' }, # From BBv2 P-56.2 { smiles => 'CSO', iupac => 'methane-SO-thioperoxol' }, { smiles => 'C1(=CC=CC=C1)[Se][SeH]', iupac => 'benzenediselenoperoxol' }, # From BBv2 P-63.4.2.1 { smiles => 'CCCOS', iupac => 'propane-1-OS-thioperoxol' }, { smiles => 'CCSS', iupac => 'ethanedithioperoxol' }, { smiles => 'CS[Se][H]', iupac => 'methane-SSe-selenothioperoxol' }, # From BBv2 P-63.4.2.2 { smiles => 'OOCCO', iupac => '2-hydroperoxyethan-1-ol' }, { smiles => 'SSCC(=O)O', iupac => 'disulfanylacetic acid', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/19_amines.t000066400000000000000000000100211463750375500171270ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'CC(CN)O', iupac => '1-aminopropan-2-ol' }, { smiles => 'C(CC(=O)N)C(=O)C(=O)O', iupac => '5-amino-2,5-dioxopentanoic acid' }, # PubChem 48 { smiles => 'CC(CC(C(=O)O)N)N', iupac => '2,4-diaminopentanoic acid' }, { smiles => 'C(CN)C=O', iupac => '3-aminopropanal' }, { smiles => 'C(CCN)CCN', iupac => 'pentane-1,5-diamine' }, { smiles => 'CC(CC1=CC=C(NCCC(C)C)C=C1)CC', iupac => '4-(2-methylbutyl)-N-(3-methylbutyl)aniline' }, # BBv2 P-14.5.4 { smiles => 'NC1=CC=CC=C1', iupac => 'aniline' }, # BBv2 P-34.1.1.5 { smiles => 'COC1=CC=C(NC2=CC=CC=C2)C=C1', iupac => '4-methoxy-N-phenylaniline' }, # BBv2 P-45.2.1 { smiles => 'NC1=C(OC2=C(NC)C=CC=C2)C=CC(=C1)C', iupac => '2-(2-amino-4-methylphenoxy)-N-methylaniline', AUTHOR => 1 }, # BBv2 P-45.2.2 { smiles => 'BrC1=C(NC2=C(C=C(C=C2)Br)Cl)C=CC(=C1)Cl', iupac => '2-bromo-N-(4-bromo-2-chlorophenyl)-4-chloroaniline' }, # BBv2 P-45.2.3 { smiles => 'BrC1=C(NC2=C(C=C(C=C2)Br)Br)C=CC(=C1)Cl', iupac => '2-bromo-4-chloro-N-(2,4-dibromophenyl)aniline', AUTHOR => 1 }, # BBv2 P-45.5 { smiles => 'CNC', iupac => 'N-methylmethanamine' }, # BBv2 P-52.1.3 # From BBv2 P-62.2.1.1.1 { smiles => 'CNC1=CC=CC=C1', iupac => 'N-methylaniline' }, { smiles => 'ClC1=CC=C(N)C=C1', iupac => '4-chloroaniline' }, { smiles => 'CC1=CC=C(N)C=C1', iupac => '4-methylaniline' }, # BBv2 P-62.2.1.1.2 # From BBv3 P-62.2.1.2 { smiles => 'CN', iupac => 'methanamine' }, { smiles => 'CC(CN)C', iupac => '2-methylpropan-1-amine' }, { smiles => 'S1CC(CCCCCCCCCC1)N', iupac => '1-thiacyclotridecan-3-amine' }, { smiles => 'CC1C(CCCC1)N', iupac => '2-methylcyclohexan-1-amine' }, { smiles => 'ClCCN', iupac => '2-chloroethan-1-amine', AUTHOR => 1 }, # From BBv2 P-62.2.2.1 { smiles => 'C(C)N(CC)CC', iupac => 'N,N-diethylethanamine' }, { smiles => 'ClCCNCCC', iupac => 'N-(2-chloroethyl)propan-1-amine' }, { smiles => 'C(C)N(CCCC)CCC', iupac => 'N-ethyl-N-propylbutan-1-amine' }, { smiles => 'C1(=CC=CC=C1)NC=1C=NC=CC1', iupac => 'N-phenylpyridin-3-amine' }, { smiles => 'C1(=CC=CC=C1)NC1=CC=CC=C1', iupac => 'N-phenylaniline' }, { smiles => 'C1(CCCCC1)NC1=CC=CC=C1', iupac => 'N-cyclohexylaniline' }, # From BBv2 P-62.2.2.2 { smiles => 'CC(C#CCN(CCC)CCC)=C', iupac => '4-methyl-N,N-dipropylpent-4-en-2-yn-1-amine' }, { smiles => 'CN(C(C)C=CC1CC=C(CC1)C)C', iupac => 'N,N-dimethyl-4-(4-methylcyclohex-3-en-1-yl)but-3-en-2-amine', AUTHOR => 1 }, { smiles => 'CN(C(C#C)CC)C', iupac => 'N,N-dimethylpent-1-yn-3-amine', AUTHOR => 1 }, { smiles => 'C(=C)NCCCC', iupac => 'N-ethenylbutan-1-amine' }, { smiles => 'CC(CN(CC(=C)C)CC(=C)C)(C)C', iupac => 'N-(2,2-dimethylpropyl)-2-methyl-N-(2-methylprop-2-en-1-yl)prop-2-en-1-amine' }, { smiles => 'C1(CCCCC1)NC1=CC=CC=C1', iupac => 'N-cyclohexylaniline' }, { smiles => 'O1C(=CC=C1)NC=1NC=CC1', iupac => 'N-(furan-2-yl)-1H-pyrrol-2-amine' }, { smiles => 'C(CCC)NC1CC1', iupac => 'N-butylcyclopropanamine' }, { smiles => 'C1=C(C=CC=2CCCCC12)NC1=CC2=CC=CC=C2C=C1', iupac => 'N-(5,6,7,8-tetrahydronaphthalen-2-yl)naphthalen-2-amine', AUTHOR => 1 }, { smiles => 'NCCC(=O)O', iupac => '3-aminopropanoic acid' }, # From BBv2 P-62.2.3 { smiles => 'CC(C)C(C)N1CCCCC(C1=O)NC', iupac => '3-(methylamino)-1-(3-methylbutan-2-yl)azepan-2-one' }, # PubChem 58916315 { smiles => 'C1CNCCC1CNCCO', iupac => '2-(piperidin-4-ylmethylamino)ethanol', AUTHOR => 1 }, # PubChem 14950460 # CHECKME: Most likely incorrect IUPAC name, should be -ethan-1-ol ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/20_imines.t000066400000000000000000000040031463750375500171320ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-62.3.1.1 { smiles => 'C(CCCCC)=N', iupac => 'hexan-1-imine' }, { smiles => 'CN=CC', iupac => 'N-methylethanimine' }, { smiles => 'ClC1=CC=C(C=C1)N=CC1=CC=C(C=C1)Cl', iupac => 'N,1-bis(4-chlorophenyl)methanimine' }, { smiles => 'ClC1=CC=C(C=C1)C=NC1=CC=C(N)C=C1', iupac => '4-{[(4-chlorophenyl)methylidene]amino}aniline', AUTHOR => 1 }, { smiles => 'S1C(CCC1)=N', iupac => 'thiolan-2-imine' }, { smiles => 'C1C(C=CC2=CC=CC=C12)=N', iupac => 'naphthalen-2(1H)-imine', AUTHOR => 1 }, # From BBv2 P-62.3.1.2 { smiles => 'N=C1CC(CC(C1)=N)=O', iupac => '3,5-diiminocyclohexan-1-one' }, { smiles => 'N=C(CC1CC(CCC1)C(=O)O)C', iupac => '3-(2-iminopropyl)cyclohexane-1-carboxylic acid' }, { smiles => 'N=C1CCC(N1)=O', iupac => '5-iminopyrrolidin-2-one' }, { smiles => 'N=C1C=CC(C=C1)=O', iupac => '4-iminocyclohexa-2,5-dien-1-one' }, # From BBv2 P-62.3.1.3 { smiles => 'CP=N', iupac => '1-methylphosphanimine', AUTHOR => 1 }, { smiles => 'C[Si](=NC1=CC=CC=C1)C', iupac => '1,1-dimethyl-N-phenylsilanimine', AUTHOR => 1 }, { smiles => 'CN=[SiH]CC(=O)OC', iupac => 'methyl [(methylimino)silyl]acetate', AUTHOR => 1 }, { smiles => 'C(C)S(=NC1=CC=CC=C1)CC', iupac => 'S,S-diethyl-N-phenyl-λ4-sulfanimine' }, # From BBv3 P-68.4.3.3 { smiles => 'C(C(=N)C(=O)O)C(=O)O', iupac => '2-iminobutanedioic acid' }, { smiles => 'CC(C)C=N', iupac => '2-methylpropan-1-imine' }, { smiles => 'CCCN=CC', iupac => 'N-propylethanimine' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/21_monocycles.t000066400000000000000000000236441463750375500200360ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'C1=CC=CC=C1', iupac => 'benzene' }, { smiles => 'C=1C=CC=CC=1', iupac => 'benzene' }, { smiles => 'c1ccccc1', iupac => 'benzene' }, { smiles => 'FC1=CC=CC=C1', iupac => 'fluorobenzene' }, { smiles => 'FC=1C(=C(C(=C(C1)F)F)F)F', iupac => 'pentafluorobenzene' }, { smiles => 'FC1=C(C(=C(C(=C1F)F)F)F)F', iupac => 'hexafluorobenzene' }, { smiles => 'C1(C=CCCC1)O', iupac => 'cyclohex-2-en-1-ol' }, # From BBv2 P-14.3.2 { smiles => 'N1=C(C=NC=C1)C(=O)O', iupac => 'pyrazinecarboxylic acid', AUTHOR => 1 }, # From BBv2 P-14.3.4.3 { smiles => 'C1(C=CCCC1)N', iupac => 'cyclohex-2-en-1-amine' }, # From BBv2 P-14.4 { smiles => 'C1(CCCC1)=CC(=O)O', iupac => 'cyclopentylideneacetic acid' }, # From BBv2 P-15.6.2 # From BBv2 P-22.1.3 { smiles => 'CC1=CC=CC=C1', iupac => 'toluene' }, { smiles => 'C=1(C(=CC=CC1)C)C', iupac => '1,2-xylene' }, { smiles => 'C1(=CC(=CC=C1)C)C', iupac => '1,3-xylene' }, { smiles => 'C1(=CC=C(C=C1)C)C', iupac => '1,4-xylene' }, { smiles => 'CC1=CC(=CC(=C1)C)C', iupac => '1,3,5-trimethylbenzene' }, { smiles => 'N(=O)C1=CC=CC=C1', iupac => 'nitrosobenzene' }, # From BBv2 P-59.1.9 { smiles => 'Br(=O)C1=CC=CC=C1', iupac => 'bromosylbenzene' }, # From BBv2 P-67.1.4.5 # From BBv3 P-22.2.1 { smiles => 'O1C=CC=C1', iupac => 'furan' }, { smiles => 'N1C=NC=C1', iupac => '1H-imidazole' }, { smiles => 'O1C=NC=C1', iupac => '1,3-oxazole' }, { smiles => 'S1C=NC=C1', iupac => '1,3-thiazole' }, { smiles => '[Se]1C=NC=C1', iupac => '1,3-selenazole' }, { smiles => '[Te]1C=NC=C1', iupac => '1,3-tellurazole' }, { smiles => 'O1N=CC=C1', iupac => '1,2-oxazole' }, { smiles => 'S1N=CC=C1', iupac => '1,2-thiazole' }, { smiles => '[Se]1N=CC=C1', iupac => '1,2-selenazole' }, { smiles => '[Te]1N=CC=C1', iupac => '1,2-tellurazole' }, { smiles => 'O1CC=CC=C1', iupac => '2H-pyran' }, { smiles => 'S1CC=CC=C1', iupac => '2H-thiopyran' }, { smiles => '[Se]1CC=CC=C1', iupac => '2H-selenopyran' }, { smiles => '[Te]1CC=CC=C1', iupac => '2H-telluropyran' }, { smiles => 'N1=CC=NC=C1', iupac => 'pyrazine' }, { smiles => 'N1N=CC=C1', iupac => '1H-pyrazole' }, { smiles => 'N1=NC=CC=C1', iupac => 'pyridazine' }, { smiles => 'N1=CC=CC=C1', iupac => 'pyridine' }, { smiles => 'N1=CN=CC=C1', iupac => 'pyrimidine' }, { smiles => 'N1C=CC=C1', iupac => '1H-pyrrole' }, { smiles => '[Se]1C=CC=C1', iupac => 'selenophene' }, { smiles => '[Te]1C=CC=C1', iupac => 'tellurophene' }, { smiles => 'S1C=CC=C1', iupac => 'thiophene' }, { smiles => 'O1CNCC1', iupac => '1,3-oxazolidine' }, { smiles => 'O1NCCC1', iupac => '1,2-oxazolidine' }, { smiles => 'S1CNCC1', iupac => '1,3-thiazolidine' }, { smiles => 'S1NCCC1', iupac => '1,2-thiazolidine' }, { smiles => '[Se]1CNCC1', iupac => '1,3-selenazolidine' }, { smiles => '[Se]1NCCC1', iupac => '1,2-selenazolidine' }, { smiles => '[Te]1CNCC1', iupac => '1,3-tellurazolidine' }, { smiles => '[Te]1NCCC1', iupac => '1,2-tellurazolidine' }, { smiles => 'N1CCCC1', iupac => 'pyrrolidine' }, { smiles => 'N1CCOCC1', iupac => 'morpholine' }, { smiles => 'N1CCSCC1', iupac => 'thiomorpholine' }, { smiles => 'N1CC[Se]CC1', iupac => 'selenomorpholine' }, { smiles => 'N1CC[Te]CC1', iupac => 'telluromorpholine' }, { smiles => 'N1NCCC1', iupac => 'pyrazolidine' }, { smiles => 'N1CNCC1', iupac => 'imidazolidine' }, { smiles => 'N1CCCCC1', iupac => 'piperidine' }, { smiles => 'N1CCNCC1', iupac => 'piperazine' }, { smiles => 'S1CCCCCCCCCCC1', iupac => 'thiacyclododecane' }, # From BBv3 P-22.2.3.1 # From BBv2 P-61.5.1 { smiles => 'CC1=C(C=C(C=C1[N+](=O)[O-])[N+](=O)[O-])[N+](=O)[O-]', iupac => '2-methyl-1,3,5-trinitrobenzene' }, # From BBv2 P-22.2.2.1.1 { smiles => 'S1C=CC=CC=C1', iupac => 'thiepine' }, { smiles => 'O1CCCCCCC1', iupac => 'oxocane' }, # From BBv2 P-22.2.2.1.5.2 { smiles => 'O1CCC1', iupac => 'oxetane' }, { smiles => 'N1CCC1', iupac => 'azetidine' }, { smiles => 'C1NNN1', iupac => 'triazetidine' }, # PubChem 23033474 # From BBv2 P-22.2.2.1.2 { smiles => 'N1=CC=CN=CC=C1', iupac => '1,5-diazocine' }, { smiles => 'O1COCC1', iupac => '1,3-dioxolane' }, # From BBv2 P-22.2.2.1.3 { smiles => 'S1C=NC=C1', iupac => '1,3-thiazole' }, { smiles => 'O1SCCC1', iupac => '1,2-oxathiolane' }, { smiles => 'O1SCCCSC1', iupac => '1,2,6-oxadithiepane' }, { smiles => 'O1N=CC=P1', iupac => '1,2,5-oxazaphosphole' }, { smiles => 'O1CCCCCCOCCCCCCCCCC1', iupac => '1,8-dioxacyclooctadecane' }, # From BBv2 P-22.2.4 # From BBv2 P-25.2.2.1.2 { smiles => 'O1C=CC=CC=COC=CC=CC=CC=CC=C1', iupac => '1,8-dioxacyclooctadeca-2,4,6,9,11,13,15,17-octaene' }, { smiles => 'O1CC=NC=CC=NC=CN=CC=C1', iupac => '1-oxa-4,8,11-triazacyclotetradeca-3,5,7,9,11,13-hexaene' }, # From BBv2 P-63.1.1.1 { smiles => 'C1=CC=CC=C1O', iupac => 'phenol' }, { smiles => 'BrC1=C(C=CC=C1)O', iupac => '2-bromophenol' }, { smiles => 'C(=O)(O)C1=CC=CC=C1', iupac => 'benzoic acid' }, { smiles => 'N1C(CCCCC1)=S', iupac => 'azepane-2-thione' }, # From BBv2 P-64.6.1 # From BBv2 P-31.1.3.1 { smiles => 'C1C=CCCC1', iupac => 'cyclohexene' }, { smiles => 'C1C=CCC=C1', iupac => 'cyclohexa-1,4-diene' }, # From BBv2 P-31.1.3.2 { smiles => 'O1C=COCCOCCOCC1', iupac => '1,4,7,10-tetraoxacyclododec-2-ene' }, { smiles => 'O1CC=NCCCCCCCC1', iupac => '1-oxa-4-azacyclododec-3-ene' }, { smiles => '[SiH2]1CC#CC=CC=CCC[SiH2]CCCCCCCCC1', iupac => '1,11-disilacycloicosa-5,7-dien-3-yne' }, { smiles => '[SiH2]1CCCCCCCC[SiH2]CC=CC=CC=CC#CC1', iupac => '1,10-disilacycloicosa-12,14,16-trien-18-yne' }, # From BBv2 P-31.1.3.3 { smiles => 'C=1=C=C=C=C=C=C=C=C=C=C1', iupac => 'cycloundecaundecaene' }, # From BBv2 P-31.1.3.4 { smiles => 'C=CC1=CC=CC=C1', iupac => 'ethenylbenzene' }, { smiles => 'C=C1C=CC=C1', iupac => '5-methylidenecyclopenta-1,3-diene' }, { smiles => 'S1CCNCCC1', iupac => '1,4-thiazepane' }, # From BBv2 P-31.2.3.2 # From BBv2 P-14.5.1 { smiles => 'C1CCCCC1(C)CC', iupac => '1-ethyl-1-methylcyclohexane' }, { smiles => 'CCC1CCC(C)CC1', iupac => '1-ethyl-4-methylcyclohexane' }, { smiles => 'CC1=NC(=CC=C1)C', iupac => '2,6-dimethylpyridine' }, { smiles => 'C1CCCCC1(C(C)(C)C)(CCCC)', iupac => '1-butyl-1-tert-butylcyclohexane' }, # Simplified version of example from BBv2 P-14.5.1 { smiles => 'C(=O)(O)CC([Br])([Br])C1CCCCC1', iupac => '3,3-dibromo-3-cyclohexylpropanoic acid' }, { smiles => 'ClC=1C=CC=CC=1C(F)(F)C(F)(F)F', iupac => '1-chloro-2-(pentafluoroethyl)benzene' }, # From BBv2 P-14.3.4.5 { smiles => 'CC(CCC)C1=CC=C(C=C1)C(CC)CC', iupac => '1-(pentan-2-yl)-4-(pentan-3-yl)benzene' }, # From BBv2 P-14.5.4 # From BBv2 P-14.5.3 { smiles => 'C(C)(C)(C)C=1C=CC=C(C(C)CC)C=1', iupac => '1-(butan-2-yl)-3-tert-butylbenzene' }, # From BBv3 P-44.4.1.5 { smiles => 'O1CS[SnH2]SCOCCCC1', iupac => '1,7-dioxa-3,5-dithia-4-stannacycloundecane' }, { smiles => 'O1CCS[SnH2]SCCOCC1', iupac => '1,9-dioxa-4,6-dithia-5-stannacycloundecane' }, { smiles => 'O=C1NC(=O)NC=C1C', iupac => '5-methylpyrimidine-2,4(1H,3H)-dione', AUTHOR => 1 }, # thymine { smiles => 'c1cc(oc1)C=O', iupac => 'furan-2-carbaldehyde', AUTHOR => 1 }, # furfural { smiles => 'O(c1cc(cc(OC)c1OC)CCN)C', iupac => '2-(3,4,5-trimethoxyphenyl)ethanamine' }, # mescaline, SMILES from Wikipedia { smiles => 'COC=1C=C(C=C(C1OC)OC)CCN', iupac => '2-(3,4,5-trimethoxyphenyl)ethanamine' }, # mescaline { smiles => 'C1=CC=CC=CC=CC=CC#C1', iupac => 'cyclododeca-1,3,5,7,9-pentaen-11-yne' }, # From BBv3 P-54.2 { smiles => 'C1CCCCC1C=O', iupac => 'cyclohexanecarbaldehyde' }, # From BBv2 P-66.6.1.1.3 { smiles => 'S=C(CC1CC(CCC1)CC(CC)=O)CC', iupac => '1-[3-(2-sulfanylidenebutyl)cyclohexyl]butan-2-one' }, # From BBv2 P-64.7.3 { smiles => 'C1=CC(=CC=C1F)Cl(=O)(=O)=O', iupac => '1-fluoro-4-perchlorylbenzene' }, # PubChem 24972904 { smiles => 'CCC1CN1Cl(=O)(=O)=O', iupac => '2-ethyl-1-perchlorylaziridine' }, # PubChem 24973518 { smiles => 'CC(=CC(=C(C)C)C(=C)C1=CC=CC=C1)C', iupac => '(5-methyl-3-propan-2-ylidenehexa-1,4-dien-2-yl)benzene' }, # PubChem 141889885 { smiles => 'Cc1c(I)c(C)c(c(c1C)I)C', iupac => '1,4-diiodo-2,3,5,6-tetramethylbenzene' }, # COD 2013412 # From BBv2 P-59.2.1.6 { smiles => 'OC1CCC(CC1)C(CCCCCO)O', iupac => '1-(4-hydroxycyclohexyl)hexane-1,6-diol', AUTHOR => 1 }, { smiles => 'O=C(CC1CC(C(C1)=O)=O)CC', iupac => '4-(2-oxobutyl)cyclopentane-1,2-dione' }, { smiles => 'O=CCCCCCCC1CC(CCC1)C=O', iupac => '3-(7-oxoheptyl)cyclohexane-1-carbaldehyde', AUTHOR => 1 }, # From BBv2 P-59.2.1.7 { smiles => 'S1C(=NC=C1)CC(=O)O', iupac => '(1,3-thiazol-2-yl)acetic acid' }, # From BBv2 P-15.6.1.2 { smiles => 'S1C(=NC=C1)CCCCC(=O)O', iupac => '5-(1,3-thiazol-2-yl)pentanoic acid' }, # Synthetic, based on BBv2 P-15.6.1.2 { smiles => 'CC1=C(C=C(C=C1)C(C)C)O', iupac => '2-methyl-5-(propan-2-yl)phenol' }, # From BBv2 P-63.1.1.2 { smiles => 'S=C1OCCC1C#N', iupac => '2-sulfanylideneoxolane-3-carbonitrile' }, # From BBv2 P-65.6.3.5.1 # From BBv2 P-64.7.1 { smiles => 'NC1C(NCCCC1)=O', iupac => '3-aminoazepan-2-one' }, { smiles => 'ClC=1C(C(=C(C(C1O)=O)Cl)O)=O', iupac => '2,5-dichloro-3,6-dihydroxycyclohexa-2,5-diene-1,4-dione' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; if( $case->{AUTHOR} && $ok ) { diag 'test supposed to fail with AUTHOR_TESTING' . ( $case->{AUTHOR} !~ /^1$/ ? ': ' . $case->{AUTHOR} : '' ); } } ChemOnomatopist-0.10.0/t/22_comparisons.t000066400000000000000000000023431463750375500202120ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Old; use Test::More; plan tests => 7; my @sorted; @sorted = sort { ChemOnomatopist::Old::cmp_attachments( $a, $b ) } ( [ 'butyl' ], [ 'tert-butyl' ] ); is join( ';', map { join ',', @$_ } @sorted ), 'butyl;tert-butyl'; @sorted = sort { ChemOnomatopist::Old::cmp_attachments( $a, $b ) } ( [ 'tricosyl' ], [ 'tert-butyl' ] ); is join( ';', map { join ',', @$_ } @sorted ), 'tricosyl;tert-butyl'; @sorted = sort { ChemOnomatopist::Old::cmp_attachments( $a, $b ) } ( [ 'ethyl' ], [ 'methyl' ] ); is join( ';', map { join ',', @$_ } @sorted ), 'ethyl;methyl'; @sorted = sort { ChemOnomatopist::cmp_heteroatom_seniority( $a, $b ) } ( [ 'N' ], [ 'S' ] ); is join( ';', map { join ',', @$_ } @sorted ), 'S;N'; @sorted = sort { ChemOnomatopist::cmp_heteroatom_seniority( $a, $b ) } ( [ 'O' ], [ 'Si' ] ); is join( ';', map { join ',', @$_ } @sorted ), 'O;Si'; @sorted = sort { ChemOnomatopist::cmp_heteroatom_seniority( $a, $b ) } ( [ 'N' ], [ 'O' ] ); is join( ';', map { join ',', @$_ } @sorted ), 'O;N'; @sorted = sort { ChemOnomatopist::cmp_heteroatom_seniority( $a, $b ) } ( [ 'N' ], [ 'O' ], [ 'P' ] ); is join( ';', map { join ',', @$_ } @sorted ), 'O;N;P'; ChemOnomatopist-0.10.0/t/23_esters.t000066400000000000000000000050701463750375500171630ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-65.6.3.2.1 { smiles => 'C(C)(=O)OCC', iupac => 'ethyl acetate', AUTHOR => 1 }, { smiles => 'C(CCC(=O)OC)(=O)OCC', iupac => 'ethyl methyl butanedioate', AUTHOR => 1 }, { smiles => 'C1(CCCCC1)C(=O)OC', iupac => 'methyl cyclohexanecarboxylate', AUTHOR => 1 }, { smiles => 'C(C)C1=CC=C(C=C1)S(=O)(=O)OC', iupac => 'methyl 4-ethylbenzene-1-sulfonate', AUTHOR => 1 }, # From BBv3 P-65.6.3.2.2 { smiles => 'C(C)(=O)OCC(COC(C)=O)OC(C)=O', iupac => 'propane-1,2,3-triyl triacetate', AUTHOR => 1 }, { smiles => 'C(CC(=O)OC1=CC=C(C=C1)OC(CC(=O)OC)=O)(=O)OC', iupac => 'dimethyl 1,4-phenylene dipropanedioate', AUTHOR => 1 }, { smiles => 'C(CC(=O)OC1=CC=C(C=C1)OC(CC(=O)OC)=O)(=O)OCC', iupac => 'ethyl methyl 1,4-phenylene dipropanedioate', AUTHOR => 1 }, # From BBv3 P-65.6.3.2.3 { smiles => '[Br-].C(C)OC(CC[N+](C)(C)C)=O', iupac => '3-ethoxy-N,N,N-trimethyl-3-oxopropan-1-aminium bromide', AUTHOR => 1 }, { smiles => 'C(C1=CC=CC=C1)(=O)OCCC(=O)O', iupac => '3-(benzoyloxy)propanoic acid', AUTHOR => 1 }, { smiles => 'C(C)(=O)OCCS(=O)(=O)O', iupac => '2-(acetyloxy)ethane-1-sulfonic acid', AUTHOR => 1 }, { smiles => 'O(C1=CC=CC=C1)S(=S)C1=CC=C(C2=CC=CC=C12)C(=O)OC', iupac => 'methyl 4-(phenoxysulfinothioyl)naphthalene-1-carboxylate', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)SS(=O)(=O)C1=CC=C(C2=CC=CC=C12)C(=O)OC', iupac => 'methyl 4-[(phenylsulfanyl)sulfonyl]naphthalene-1-carboxylate', AUTHOR => 1 }, { smiles => 'C(C)OC(=O)OC(C(=O)OCC)C(C(C)(C)C)=O', iupac => 'ethyl 2-[(ethoxycarbonyl)oxy]-4,4-dimethyl-3-oxopentanoate', AUTHOR => 1 }, { smiles => 'N1=CC(=CC=C1)C(=O)OCCC(=O)O', iupac => '3-[(pyridine-3-carbonyl)oxy]propanoic acid', AUTHOR => 1 }, { smiles => 'N1=C(C=CC2=CC=CC=C12)C(=O)OCC(=O)O', iupac => '[(quinoline-2-carbonyl)oxy]acetic acid', AUTHOR => 1 }, { smiles => 'CCCCCCCC(=O)OC(C)(C)C', iupac => 'tert-butyl octanoate', AUTHOR => 1 }, # BBv2 P-65.6.3.3.1 { smiles => 'CCOC(=O)CC(=O)OC', iupac => 'ethyl methyl propanedioate', AUTHOR => 1 }, # BBv2 P-65.6.3.3.2.1 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/24_monocycle_names.t000066400000000000000000000013201463750375500210240ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use ChemOnomatopist; use ChemOnomatopist::Chain::Monocycle; use Graph::Traversal::DFS; use Test::More; my @cases = sort keys %ChemOnomatopist::Chain::Monocycle::names; plan tests => scalar @cases; for my $SMILES (@cases) { my $ring = $SMILES; $ring =~ s/^(\[?[A-Za-z][a-z]?\]?)/${1}1/; $ring .= 1; my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $ring ); $graph->delete_vertices( grep { $_->{symbol} eq 'H' } $graph->vertices ); my $cycle = ChemOnomatopist::Chain::Circular->new( $graph, Graph::Traversal::DFS->new( $graph )->dfs ); is $cycle->backbone_SMILES, $SMILES; } ChemOnomatopist-0.10.0/t/25_alkene_alkyne.t000066400000000000000000000037551463750375500204720ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-15.1.7.2.3 { smiles => 'C=CCCO', iupac => 'but-3-en-1-ol' }, { smiles => 'C=CC(C)C', iupac => '3-methylbut-1-ene' }, { smiles => 'CC=CC(O)CC(C)C', iupac => '6-methylhept-2-en-4-ol' }, # From BBv2 P-31.1.1.2 { smiles => 'C=CC=C', iupac => 'buta-1,3-diene' }, { smiles => 'C=CC=CC=CC=CC', iupac => 'nona-1,3,5,7-tetraene' }, { smiles => 'COCCOCCOCCOCC=C', iupac => '2,5,8,11-tetraoxatetradec-13-ene' }, # BBv2 P-31.1.2.2.2 # From BBv2 P-44.4.1.1 { smiles => 'CC=CC#C', iupac => 'pent-3-en-1-yne' }, { smiles => 'CCC=CC', iupac => 'pent-2-ene' }, { smiles => 'CCOCCOCCOCCOC=C', iupac => '3,6,9,12-tetraoxatetradec-1-ene' }, # From BBv2 P-44.4.1.2 { smiles => 'C1C=CCCCCCCCCCCCCCCCCC1', iupac => 'cycloicosene' }, { smiles => 'C1C#CCCCCCCCCCCCCCCCCC1', iupac => 'cycloicosyne' }, { smiles => 'C1=CCCCCCC=CCCCCCCCCCCC1', iupac => 'cycloicosa-1,8-diene' }, { smiles => 'CCC(=C)CCC', iupac => '3-methylidenehexane' }, # BBv2 P-61.2.1 { smiles => 'C(C)=C(C(C)C1=NC=CC=C1)CCC=C(C)C', iupac => '2-(3-ethylidene-7-methyloct-6-en-2-yl)pyridine' }, # BBv2 P-61.2.4 { smiles => 'CCC=CCC', iupac => 'hex-3-ene' }, # Chain halves are joined by double bond { smiles => 'CC#CCC(=O)CNCC#C', iupac => '1-(prop-2-ynylamino)hex-4-yn-2-one' }, # PubChem 116589377 { smiles => 'C=CC([Br])C(CCC)CC(CC)=C(F)C', iupac => '3-bromo-6-ethyl-7-fluoro-4-propylocta-1,6-diene' }, # Taken from school's assignment ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/26_monospiro.t000066400000000000000000000046521463750375500177130ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-24.2.1 { smiles => 'C1CCCC12CCCCC2', iupac => 'spiro[4.5]decane' }, { smiles => 'C1CCCC12CCCC2', iupac => 'spiro[4.4]nonane' }, # From BBv2 P-24.2.4.1.1 { smiles => 'C1CCCC12OCCCC2', iupac => '6-oxaspiro[4.5]decane' }, { smiles => 'C1CCCC12NCCOC2', iupac => '9-oxa-6-azaspiro[4.5]decane' }, { smiles => 'C1CCCC12CSCNC2', iupac => '7-thia-9-azaspiro[4.5]decane' }, # From BBv2 P-31.1.5.1.1 { smiles => 'C1=CCCCC12CC=CCC2', iupac => 'spiro[5.5]undeca-1,8-diene' }, { smiles => 'C1CCCC12C=CCCC2', iupac => 'spiro[4.5]dec-6-ene' }, # From BBv3 P-44.4.1.5 { smiles => 'O1CCOP12OCCCO2', iupac => '1,4,6,10-tetraoxa-5λ5-phosphaspiro[4.5]decane' }, { smiles => 'C1OOCP12OCCCO2', iupac => '2,3,6,10-tetraoxa-5λ5-phosphaspiro[4.5]decane' }, { smiles => 'C1C=CCC12CCCCC2', iupac => 'spiro[4.5]dec-2-ene' }, # From BBv2 P-93.5.3.3 { smiles => 'C1CC2(C1)C(C2(C#N)C#N)(C#N)C#N', iupac => 'spiro[2.3]hexane-1,1,2,2-tetracarbonitrile' }, # PubChem 263661 { smiles => 'CC(C)(C)C1CCC2(C(C1)(CCCO)O)OCCO2', iupac => '8-tert-butyl-6-(3-hydroxypropyl)-1,4-dioxaspiro[4.5]decan-6-ol', AUTHOR => 1 }, # PubChem 496485 { smiles => 'CCCN1C(=O)C(NC(=O)C12CCNCC2)CC(C)C', iupac => '3-(2-methylpropyl)-1-propyl-1,4,9-triazaspiro[5.5]undecane-2,5-dione' }, # PubChem 9856956 { smiles => 'C1CCOC2(C1)CC(=O)CCO2', iupac => '1,7-dioxaspiro[5.5]undecan-4-one' }, # PubChem 11217490 { smiles => 'C1CCC2(C1)CCOC(=O)C2', iupac => '8-oxaspiro[4.5]decan-9-one', AUTHOR => 1 }, # PubChem 12733330 { smiles => 'O1CCOC12CCC(CC2)C(C(C)C)=O', iupac => '1-(1,4-dioxaspiro[4.5]decan-8-yl)-2-methylpropan-1-one' }, # PubChem 13353114 # This might not be a real compound, nevertheless, locants should probably not be added to it { smiles => 'FC1(C(C(C(C(C12C(C(C(C2(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)F', iupac => 'octadecafluorospiro[4.5]decane', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/27_bicycle.t000066400000000000000000000122371463750375500172770ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-25.0 { smiles => 'C1=CC=CC2=CC=CC=C12', iupac => 'naphthalene' }, { smiles => 'C=12C=CC=CC=1C=CC=C2', iupac => 'naphthalene' }, # From BBv2 P-25.2.1 { smiles => 'N1=CN=CC2=NC=CN=C12', iupac => 'pteridine' }, { smiles => 'N1=CN=C2N=CNC2=C1', iupac => '7H-purine' }, { smiles => 'N1N=CC2=CC=CC=C12', iupac => '1H-indazole' }, { smiles => 'C12=C(C=CN2)C=CC=C1', iupac => '1H-indole' }, { smiles => 'O1CC=CC2=C1C=CC=C2', iupac => '2H-1-benzopyran' }, { smiles => 'C1OC=CC2=C1C=CC=C2', iupac => '1H-2-benzopyran' }, { smiles => 'C1[Se]C=CC2=C1C=CC=C2', iupac => '1H-2-benzoselenopyran' }, { smiles => 'N1=CC=CC2=CC=CC=C12', iupac => 'quinoline' }, # From BBv3 P-25.2.1, Table 2.9 { smiles => 'C1=CC=CC2=NC3=CC=CC=C3C=C12', iupac => 'acridine' }, { smiles => 'C1=CC=CC2=[As]C3=CC=CC=C3C=C12', iupac => 'acridarsine' }, { smiles => 'C1=CC=CC2=PC3=CC=CC=C3C=C12', iupac => 'acridophosphine' }, { smiles => 'N1C=CC2=CC=CC=C12', iupac => '1H-indole' }, { smiles => '[AsH]1C=CC2=CC=CC=C12', iupac => 'arsindole' }, { smiles => 'P1C=CC2=CC=CC=C12', iupac => 'phosphindole' }, # Not sure if H prefix is not needed { smiles => 'C=1C=CN2C=CC=CC12', iupac => 'indolizine' }, { smiles => 'C=1C=C[As]2C=CC=CC12', iupac => 'arsindolizine' }, { smiles => 'C=1C=CP2C=CC=CC12', iupac => 'phosphindolizine' }, { smiles => 'C=1NC=C2C=CC=CC12', iupac => '2H-isoindole' }, { smiles => 'C=1[AsH]C=C2C=CC=CC12', iupac => 'isoarsindole' }, { smiles => 'C=1PC=C2C=CC=CC12', iupac => 'isophosphindole' }, { smiles => 'C1=[As]C=CC2=CC=CC=C12', iupac => 'isoarsinoline' }, { smiles => 'C1=PC=CC2=CC=CC=C12', iupac => 'isophosphinoline' }, { smiles => 'C1=CC=CC2=NC=C3C=CC=CC3=C12', iupac => 'phenanthridine' }, { smiles => 'C1=CC=CC2=[As]C=C3C=CC=CC3=C12', iupac => 'arsanthridine' }, { smiles => 'C1=CC=CC2=PC=C3C=CC=CC3=C12', iupac => 'phosphanthridine' }, { smiles => '[As]1=CC=CC2=CC=CC=C12', iupac => 'arsinoline' }, { smiles => 'P1=CC=CC2=CC=CC=C12', iupac => 'phosphinoline' }, { smiles => 'C=1C=CC[As]2C=CC=CC12', iupac => '4H-arsinolizine' }, { smiles => 'C=1C=CCP2C=CC=CC12', iupac => '4H-phosphinolizine' }, { smiles => 'C1=CC=C2C(=C1)C(=C(P2(Cl)(Cl)Cl)Cl)Cl', iupac => '1,1,1,2,3-pentachlorophosphindole', AUTHOR => 1 }, # PubChem 2784508, why λ5 is not present? { smiles => 'C1=CC=C2C(C(C=CC2=C1)O)O', iupac => '1,2-dihydronaphthalene-1,2-diol' }, # PubChem 362 { smiles => 'C12(C(C(C(C(C1(F)F)(F)F)(F)F)(F)F)(C(C(C(C2(F)F)(F)F)(F)F)(F)F)F)F', iupac => '1,1,2,2,3,3,4,4,4a,5,5,6,6,7,7,8,8,8a-octadecafluoronaphthalene' }, # PubChem 9386 { smiles => 'C1=CC=CC=CC2=CC=CC=CC=C12', iupac => 'octalene' }, # From BBv2 P-25.1.2.3 # From BBv2 P-25.2.2.4 { smiles => 'C1=COC=CC2=C1C=CC=C2', iupac => '3-benzoxepine' }, { smiles => 'O1C=CC2=C1C=CC=C2', iupac => '1-benzofuran' }, { smiles => 'C=1OC=C2C1C=CC=C2', iupac => '2-benzofuran' }, { smiles => 'C1=CC=COC=CC=CC=COC=CC=CC2=C1C=CC=C2', iupac => '5,12-benzodioxacyclooctadecine', AUTHOR => 1 }, { smiles => 'C1=C(C=CC2=CC=CC=C12)CCCO', iupac => '3-(naphthalen-2-yl)propan-1-ol' }, # From P-15.6.1.2 # From BBv3 P-44.4.1.5 { smiles => '[SiH2]1O[SiH]=CC2=C1C=CC=C2', iupac => '1H-2,1,3-benzoxadisiline' }, { smiles => '[SiH2]1OC=[SiH]C2=C1C=CC=C2', iupac => '1H-2,1,4-benzoxadisiline' }, { smiles => 'C1=CC2=C(C=C1O)C(=CN2)CCN', iupac => '3-(2-aminoethyl)-1H-indol-5-ol' }, # serotonin # Guanine { smiles => 'O=C1c2ncnc2nc(N)N1', iupac => '2-amino-1,9-dihydro-6H-purin-6-one', AUTHOR => 1 }, # FIXME: fails due to aromaticity { smiles => 'NC=1NC(C=2N=CNC2N1)=O', iupac => '2-amino-1,9-dihydro-6H-purin-6-one', AUTHOR => 1 }, { smiles => 'N1C(N)=NC=2N=CNC2C1=O', iupac => '2-amino-1,9-dihydro-6H-purin-6-one', AUTHOR => 1 }, { smiles => 'CCN1C=NC2=C(N=CN=C21)N', iupac => '9-ethylpurin-6-amine' }, # PubChem 7 { smiles => 'CCCCCNC1=C(C=NC2=CC=CC=C21)[N+](=O)[O-]', iupac => '3-nitro-N-pentylquinolin-4-amine' }, # PubChem 21355672 { smiles => 'CC1=C(C2=C(C=C1)C=C(S2)O)CO', iupac => '7-(hydroxymethyl)-6-methyl-1-benzothiophen-2-ol' }, # PubChem 130803523 { smiles => 'COC1=C(C2=C(C=C1)SC(=N2)N)Br', iupac => '4-bromo-5-methoxy-1,3-benzothiazol-2-amine' }, # PubChem 131985781 { smiles => 'C1=CC=C2C(=C1)N=CS2', iupac => '1,3-benzothiazole' }, # PubChem 7222 { smiles => 'C1=CC=C2C(=C1)C=NS2', iupac => '1,2-benzothiazole' }, # PubChem 9225 { smiles => 'C1=CC2=CSN=C2C=C1', iupac => '2,1-benzothiazole' }, # PubChem 638008 { smiles => 'C1(=CC=C(C=2C(=CC=C(C12)C(=O)O)C(=O)O)C(=O)O)C(=O)O', iupac => 'naphthalene-1,4,5,8-tetracarboxylic acid' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/28_cyanide.t000066400000000000000000000024751463750375500173050ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-66.5.1.1.1 { smiles => 'C(CCCCC)#N', iupac => 'hexanenitrile', AUTHOR => 1 }, { smiles => 'C(CCCC#N)#N', iupac => 'pentanedinitrile', AUTHOR => 1 }, { smiles => 'C(CCC)(C#N)(C#N)C#N', iupac => 'butane-1,1,1-tricarbonitrile' }, # BBv2 P-66.5.1.1.2 # From BBv2 P-66.5.1.1.3 { smiles => '[SiH3]C#N', iupac => 'silanecarbonitrile' }, { smiles => 'C1(CCCCC1)C#N', iupac => 'cyclohexanecarbonitrile' }, { smiles => 'N1(CCCCC1)C#N', iupac => 'piperidine-1-carbonitrile' }, # From BBv2 P-66.5.1.1.4 { smiles => 'C(#N)C1=CC=C(O1)C(=O)O', iupac => '5-cyanofuran-2-carboxylic acid' }, { smiles => 'C(#N)CCC(=O)O', iupac => '3-cyanopropanoic acid' }, { smiles => 'C(#N)CC(CCC#N)CCC#N', iupac => '4-(cyanomethyl)heptanedinitrile', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/29_nonfused_cycles.t000066400000000000000000000061121463750375500210450ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'ClC1=NC=CC(=C1)OC1=NC=C(C=C1)Cl', iupac => '2-chloro-4-[(5-chloropyridin-2-yl)oxy]pyridine', AUTHOR => 1 }, # BBv2 P-15.3.2.4.2 { smiles => 'C(C1=CC=CC=C1)C1=NC=CC=C1', iupac => '2-benzylpyridine' }, # BBv2 P-29.6.1 { smiles => 'SC1=CC=C(C=C1)SSC=1C=C(C=CC1)S', iupac => '3-[(4-sulfanylphenyl)disulfanyl]benzene-1-thiol' }, # BBv2 P-63.1.5 { smiles => 'C1(=CC=CC=C1)SC1CCNCC1', iupac => '4-(phenylsulfanyl)piperidine' }, # BBv2 P-63.2.5 # From BBv2 P-63.2.4.2 { smiles => 'C1(CCCCC1)OC1=CC=CC=C1', iupac => '(cyclohexyloxy)benzene', AUTHOR => 1 }, # FIXME: Very close { smiles => 'N1=CC(=CC=C1)OC1=NC=CN=C1', iupac => '2-[(pyridin-3-yl)oxy]pyrazine' }, { smiles => 'c1ncccc1[C@@H]2CCCN2C', iupac => '3-[(2S)-1-methylpyrrolidin-2-yl]pyridine', AUTHOR => 1 }, # nicotine { smiles => 'C1=CSC(=C1)SSC2=CC=CS2', iupac => '2-(thiophen-2-yldisulfanyl)thiophene' }, # PubChem 23347 { smiles => 'CC1C(=O)NC(C(=O)N1C(C)C(C)C)C2CCCCC2', iupac => '3-cyclohexyl-6-methyl-1-(3-methylbutan-2-yl)piperazine-2,5-dione' }, # PubChem 64959818 { smiles => 'C1=CC2=C(C=CC(=C2)C(C3=NC=NC=C3)N)N=C1', iupac => 'pyrimidin-4-yl(quinolin-6-yl)methanamine', AUTHOR => 1 }, # PubChem 81231965 { smiles => 'C1OC2=C(O1)C=C(C=C2)C3=C(N=CS3)CN', iupac => '[5-(1,3-benzodioxol-5-yl)-1,3-thiazol-4-yl]methanamine', AUTHOR => 1 }, # PubChem 84144019 { smiles => 'C1=CC(=C(N=C1)C2=NC(=NS2)N)Br', iupac => '5-(3-bromopyridin-2-yl)-1,2,4-thiadiazol-3-amine' }, # PubChem 107526369 { smiles => 'C1=CC(=NC(=C1N)C2=CC(=C(C=C2Cl)Cl)Cl)C(=O)O', iupac => '5-amino-6-(2,4,5-trichlorophenyl)pyridine-2-carboxylic acid' }, # PubChem 133086582 { smiles => 'CC1CCC(CC(C1)C)C2CCCCC2', iupac => '1-cyclohexyl-3,5-dimethylcycloheptane' }, # PubChem 149225482 { smiles => 'CC1=CC(=C(C=C1)C(C)(C)C2=CC(=C(C=C2)C)C)C', iupac => '1-[2-(3,4-dimethylphenyl)propan-2-yl]-2,4-dimethylbenzene' }, # PubChem 54559144 { smiles => 'CCOC1=C(C(=CC(=C1)CNCC2=NN=C(N2C)C)Cl)OC', iupac => '1-(3-chloro-5-ethoxy-4-methoxyphenyl)-N-[(4,5-dimethyl-1,2,4-triazol-3-yl)methyl]methanamine', AUTHOR => 1 }, # PubChem 56822512 # incorrectly selected parent chain { smiles => 'C1CC1=CC2=CC=CC=C2Cl', iupac => '1-chloro-2-(cyclopropylidenemethyl)benzene' }, # PubChem 54594307 { smiles => 'COC1=NN=C(C=C1C(=O)O)C2=CC=CC=N2', iupac => '3-methoxy-6-pyridin-2-ylpyridazine-4-carboxylic acid', AUTHOR => 1 }, # PubChem 117127049 # differs in brackets { smiles => 'C1=CC=C2C(=C1)N=C(C(=N2)SC3=CC=CC=N3)SC4=CC=CC=N4', iupac => '2,3-bis(pyridin-2-ylsulfanyl)quinoxaline' }, # PubChem 16044110 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/30_guanidine.t000066400000000000000000000020361463750375500176160ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'NC(=N)N', iupac => 'guanidine' }, # From BBv2 P-66.4.1.2.1.2 { smiles => 'CN(C(=NC1=CC=CC=C1)N(C)C)C', iupac => "N,N,N',N'-tetramethyl-N''-phenylguanidine" }, { smiles => 'CN(C(=N)NC)C', iupac => "N,N,N'-trimethylguanidine" }, { smiles => 'NC(N)=NCCCC(=O)O', iupac => '4-[(diaminomethylidene)amino]butanoic acid' }, # From BBv2 P-66.4.1.2.1.3 { smiles => 'NC(C(=O)O)CCCNC(N)=N', iupac => '2-amino-5-(carbamimidoylamino)pentanoic acid' }, # From BBv2 P-103.1.1.1, arginine ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/31_ethers.t000066400000000000000000000023631463750375500171510ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-63.2.4.1 { smiles => 'COC', iupac => 'methoxymethane' }, { smiles => 'CCOC', iupac => 'methoxyethane' }, { smiles => 'C1(=CC=CC=C1)OC', iupac => 'anisole' }, { smiles => 'COC1=CC2=CC=CC=C2C=C1', iupac => '2-methoxynaphthalene' }, { smiles => 'ClCCOCC', iupac => '1-chloro-2-ethoxyethane', AUTHOR => 1 }, { smiles => 'COCCOC', iupac => '1,2-dimethoxyethane', AUTHOR => 1 }, { smiles => 'COCCOCCOC', iupac => '1-methoxy-2-(2-methoxyethoxy)ethane', AUTHOR => 1 }, # From P-63.2.5 { smiles => 'CSC1=CC=CC=C1', iupac => '(methylsulfanyl)benzene', AUTHOR => 1 }, { smiles => 'ClC1=CC=C(C=C1)[Se]CCl', iupac => '1-chloro-4-[(chloromethyl)selanyl]benzene', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/32_amides.t000066400000000000000000000126261463750375500171250ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'C(C)N(C(=O)C=1OC=CC1)CC', iupac => 'N,N-diethylfuran-2-carboxamide' }, # From BBv2 P-16.2.1 { smiles => 'N(=C=O)N[N+](=O)[O-]', iupac => 'isocyanatonitramide' }, # From BBv3 P-58.3.2 # From BBv2 P-62.2.3 { smiles => 'N(C1=CC=CC=C1)C=1C=C(C(=O)O)C=CC1', iupac => '3-anilinobenzoic acid' }, { smiles => 'CN(C1=CC=CC=C1)C=1C=C(C=CC1)O', iupac => '3-(N-methylanilino)phenol' }, { smiles => 'S(S)C=1C=C(C(=O)N)C=CC1SS', iupac => '3,4-bis(disulfanyl)benzamide' }, # From BBv2 P-63.4.2.2 # From BBv2 P-65.1.1.3.2 { smiles => 'ONC(CC)=O', iupac => 'N-hydroxypropanamide' }, { smiles => 'ONC(=O)C1CCCCC1', iupac => 'N-hydroxycyclohexanecarboxamide' }, # From BBv2 P-66.1.1.1.1.1 { smiles => 'C(CCCCC)(=O)N', iupac => 'hexanamide' }, { smiles => 'C(CCCC(=O)N)(=O)N', iupac => 'pentanediamide', AUTHOR => 1 }, { smiles => 'C(C(CC(=O)N)C(=O)N)C(=O)N', iupac => 'propane-1,2,3-tricarboxamide', AUTHOR => 1 }, # BBv2 P-66.1.1.1.1.2 # From BBv2 P-66.1.1.1.1.3 { smiles => 'PC(=O)N', iupac => 'phosphanecarboxamide', AUTHOR => 1 }, { smiles => 'N(N)C(=O)N', iupac => 'hydrazinecarboxamide', AUTHOR => 1 }, { smiles => 'S1C(=CC=C1)C(=O)N', iupac => 'thiophene-2-carboxamide' }, { smiles => 'N1(CCCCC1)C(=O)N', iupac => 'piperidine-1-carboxamide' }, { smiles => 'C(C1=CC=CC=C1)(=O)N', iupac => 'benzamide' }, # BBv2 P-66.1.1.1.2.1 # From BBv2 P-66.1.1.1.2.4 { smiles => 'C(C=C)(=O)N', iupac => 'prop-2-enamide' }, { smiles => 'OC(C(=O)N)C', iupac => '2-hydroxypropanamide' }, # From BBv2 P-66.1.1.3.1.1 { smiles => 'CNC(C1=CC=CC=C1)=O', iupac => 'N-methylbenzamide' }, { smiles => 'C(C)N(C(=O)C=1OC=CC1)CC', iupac => 'N,N-diethylfuran-2-carboxamide' }, # From BBv2 P-66.1.1.3.4 { smiles => 'C1(=CC=CC=C1)NC=O', iupac => 'N-phenylformamide' }, { smiles => 'C1(=CC=CC=C1)NC(C)=O', iupac => 'N-phenylacetamide' }, { smiles => 'C1(=CC=CC=C1)NC(CCCCC)=O', iupac => 'N-phenylhexanamide' }, { smiles => 'CN(C(C1=CC=CC=C1)=O)C1=CC=CC=C1', iupac => 'N-methyl-N-phenylbenzamide' }, { smiles => 'CN(C(C1=CC=C(C=C1)C)=O)C1=CC(=CC=C1)C', iupac => 'N,4-dimethyl-N-(3-methylphenyl)benzamide' }, # From BBv2 P-66.1.1.3.5 { smiles => 'N1=CC=C(C=C1)C1=CC=C(C(=O)N)C=C1', iupac => '4-(pyridin-4-yl)benzamide' }, { smiles => 'ClC1=NC=CC=C1C(=O)N', iupac => '2-chloropyridine-3-carboxamide' }, { smiles => 'CC=1C=C(C(=CC1)C(=O)N)C(=O)N', iupac => '4-methylbenzene-1,2-dicarboxamide', AUTHOR => 1 }, { smiles => 'OC1=C(C(=O)N)C=CC=C1', iupac => '2-hydroxybenzamide' }, { smiles => 'NC=1C(=NC(=C(N1)N)Cl)C(=O)N', iupac => '3,5-diamino-6-chloropyrazine-2-carboxamide' }, { smiles => 'C(CNC1C(CCCC1)C(=O)N)NC1C(CCCC1)C(=O)N', iupac => '2,2\'-[ethane-1,2-diylbis(azanediyl)]di(cyclohexane-1-carboxamide)', AUTHOR => 1 }, { smiles => 'C(C1=CC=CC=C1)(=O)NC1=CC=C(C=C1)S(=O)(=O)O', iupac => '4-benzamidobenzene-1-sulfonic acid' }, # BBv2 P-66.1.1.4.3 # From BBv2 P-66.1.3 { smiles => 'N1(CCCCC1)C(C)=O', iupac => '1-(piperidin-1-yl)ethan-1-one' }, { smiles => 'N1(CCCC2=CC=CC=C12)C(CC)=O', iupac => '1-(3,4-dihydroquinolin-1(2H)-yl)propan-1-one', AUTHOR => 1 }, # From BBv2 P-66.1.4.1.1 { smiles => 'C(N)=S', iupac => 'methanethioamide' }, { smiles => 'C(C)(N)=S', iupac => 'ethanethioamide' }, { smiles => 'C1(=CC=CC=C1)C(N)=S', iupac => 'benzenecarbothioamide', AUTHOR => 1 }, { smiles => 'C(CCC(N)=S)(N)=S', iupac => 'butanedithioamide', AUTHOR => 1 }, { smiles => 'N1=C(C=CC=C1)C(N)=S', iupac => 'pyridine-2-carbothioamide' }, { smiles => 'C1=C(C=CC2=CC=CC=C12)S(N)(=S)=S', iupac => 'naphthalene-2-sulfonodithioamide', AUTHOR => 1 }, { smiles => 'N1=CC=C(C=C1)C(N)=S', iupac => 'pyridine-4-carbothioamide' }, { smiles => 'CCC(=O)N(C1CCN(CC1)CCC2=CC=CC=C2)C3=CC=CC=C3', iupac => 'N-phenyl-N-[1-(2-phenylethyl)piperidin-4-yl]propanamide' }, # PubChem 3345 { smiles => 'CC(CCOC)NC(=O)CCCCCCN', iupac => '7-amino-N-(4-methoxybutan-2-yl)heptanamide', AUTHOR => 1 }, # PubChem 64604850 { smiles => 'C1CCN(CC1)CCCCC(=O)N=C(CC(=N)C2=CC=NC=C2)N', iupac => 'N-(1-amino-3-imino-3-pyridin-4-ylpropylidene)-5-piperidin-1-ylpentanamide', AUTHOR => 1 }, # PubChem 90937303 { smiles => 'C1CCC(CC1)N2C=C(C(=N2)C(=O)N)N', iupac => '4-amino-1-cyclohexylpyrazole-3-carboxamide' }, # PubChem 107345270 { smiles => 'CC1(CC1(Cl)Cl)C(=O)NC2=CC=CC(=C2)C3=NN=CO3', iupac => '2,2-dichloro-1-methyl-N-[3-(1,3,4-oxadiazol-2-yl)phenyl]cyclopropane-1-carboxamide' }, # PubChem 43061667 { smiles => 'C1CC1N(CC2=CN=CC=C2)C(=O)C3CSCN3', iupac => 'N-cyclopropyl-N-(pyridin-3-ylmethyl)-1,3-thiazolidine-4-carboxamide' }, # PubChem 54869854 { smiles => 'C1=CC=C(C=C1)NC(=O)CS(=O)C2=CC=CC=C2', iupac => '2-(benzenesulfinyl)-N-phenylacetamide' }, # PubChem 13478044 { smiles => 'C(C)(=O)N(C(C1=CC=CC=C1)=O)C(CCCl)=O', iupac => 'N-acetyl-N-(3-chloropropanoyl)benzamide', AUTHOR => 1 }, # BBv2 P-66.1.2.1 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/33_hydrazines.t000066400000000000000000000050071463750375500200370ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-66.3.2.1 { smiles => 'N(N)C(=O)O', iupac => 'hydrazinecarboxylic acid', AUTHOR => 1 }, { smiles => 'N(N)S(=O)(=O)CC(=O)O', iupac => '(hydrazinesulfonyl)acetic acid' }, { smiles => 'N(N)S(=O)C=1C(=CC2=CC=CC=C2C1)C(=O)O', iupac => '3-(hydrazinesulfinyl)naphthalene-2-carboxylic acid' }, { smiles => 'N(N)C(=O)C1=C(C=CC=C1)S(=O)(=O)O', iupac => '2-(hydrazinecarbonyl)benzene-1-sulfonic acid', AUTHOR => 1 }, # From BBv2 P-68.3.1.2.1 { smiles => 'CN(N)C', iupac => '1,1-dimethylhydrazine' }, { smiles => 'C1(=CC=CC=C1)NN', iupac => 'phenylhydrazine' }, { smiles => 'N(N)CN' => iupac => '1-hydrazinylmethanamine' }, { smiles => 'N(N)C(=O)O', iupac => 'hydrazinecarboxylic acid', AUTHOR => 1 }, { smiles => 'FN(N(F)F)F', iupac => 'tetrafluorohydrazine' }, { smiles => 'N(N)CC#N', iupac => 'hydrazinylacetonitrile', AUTHOR => 1 }, # From BBv3 P-68.3.1.2.2 { smiles => 'C(CC)=NN', iupac => 'propylidenehydrazine' }, { smiles => 'CN(N=C(C)C)C', iupac => '1,1-dimethyl-2-(propan-2-ylidene)hydrazine' }, { smiles => 'C(C=NNC1=CC=CC=C1)=NNC1=CC=CC=C1', iupac => '1,1\'-ethanediylidenebis(2-phenylhydrazine)', AUTHOR => 1 }, { smiles => 'CC(C)=NNC1=C(C(=O)O)C=CC=C1', iupac => '2-[(propan-2-ylidene)hydrazinyl]benzoic acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)NN=C1CCC(CC1)C(=O)O', iupac => '4-(phenylhydrazinylidene)cyclohexane-1-carboxylic acid' }, # From BBv3 P-68.3.1.3.5.1 { smiles => 'C1(=CC=CC=C1)N=NC(C(C)=O)=NNC1=CC=CC=C1', iupac => '1-(phenyldiazenyl)-1-(phenylhydrazinylidene)propan-2-one' }, { smiles => 'C1(=CC=CC=C1)C(C(=NNC1=CC=CC=C1)N=NC1=CC=CC=C1)=O', iupac => '1-phenyl-2-(phenyldiazenyl)-2-(phenylhydrazinylidene)ethan-1-one' }, { smiles => 'C1(=CC=CC=C1)N=NC=NNC(C)=O', iupac => 'N\'-[(phenyldiazenyl)methylidene]acetohydrazide', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)N=NC(CC(=O)O)=NNC1=CC=CC=C1', iupac => '3-(phenyldiazenyl)-3-(phenylhydrazinylidene)propanoic acid' }, # From BBv3 P-68.3.1.3.5.2 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/34_sulfoxides.t000066400000000000000000000015431463750375500200460ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-63.6 { smiles => 'C(C)S(=O)CCCC', iupac => '1-(ethanesulfinyl)butane', AUTHOR => 1 }, { smiles => 'C(C)[Se](=O)C1=CC=CC=C1', iupac => '(ethaneseleninyl)benzene' }, { smiles => 'C1(CCCCC1)S(=O)N1CC(OCC1)C(=O)O', iupac => '4-(cyclohexanesulfinyl)morpholine-2-carboxylic acid' }, # BBv2 P-65.4.1 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/35_acids.t000066400000000000000000000023141463750375500167420ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'C(O)S(=O)O', iupac => 'hydroxymethanesulfinic acid' }, # PubChem 9000 { smiles => 'S(=O)(O)CC(=O)O', iupac => 'sulfinoacetic acid' }, # From BBv2 P-65.3.2.1 # From BBv3 P-65.3.1 { smiles => 'C1(=CC=CC=C1)S(=O)(=O)O', iupac => 'benzenesulfonic acid' }, { smiles => 'CC(CC)S(=O)O', iupac => 'butane-2-sulfinic acid' }, { smiles => 'CC1=C(C=C(C=C1)S(=O)(=O)O)S(=O)(=O)O', iupac => '4-methylbenzene-1,3-disulfonic acid' }, { smiles => 'C=1(C(=CC=C2C=CC=CC12)S(=O)(=O)O)C1=CC2=CC=CC=C2C=C1', iupac => '[1,2′-binaphthalene]-2-sulfonic acid', AUTHOR => 1 }, { smiles => 'NC1=CC=C(C=C1)S(=O)(=O)O', iupac => '4-aminobenzene-1-sulfonic acid' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/36_acyl_halides.t000066400000000000000000000021501463750375500202770ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-65.5.1 { smiles => 'C(CCCCC)(=O)F', iupac => 'hexanoyl fluoride' }, { smiles => 'C(CC(=O)Cl)(=O)Cl', iupac => 'propanedioyl dichloride', AUTHOR => 1 }, { smiles => 'C1(=CC=C(C=C1)C(=O)Cl)C(=O)Cl', iupac => 'benzene-1,4-dicarbonyl dichloride', AUTHOR => 1 }, { smiles => 'C(CCC(=O)Cl)(=O)Br', iupac => 'butanedioyl bromide chloride', AUTHOR => 1 }, { smiles => 'C(CC)(=O)Br', iupac => 'propanoyl bromide' }, # From BBv3 P-68.5.0 { smiles => 'C(C(=O)F)(C(F)(F)F)O', iupac => '3,3,3-trifluoro-2-hydroxypropanoyl fluoride' }, # PubChem 53938350 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/37_fusion.t000066400000000000000000000046701463750375500171730ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-25.3.1.3 { smiles => '[Se]1C=CC2=C1[Se]C=C2', iupac => 'selenopheno[2,3-b]selenophene' }, { smiles => '[Se]1C=2C(C=C1)=C[Se]C2', iupac => 'selenopheno[3,4-b]selenophene', AUTHOR => 1 }, { smiles => '[Se]1C2=C(C=C1)[Se]C=C2', iupac => 'selenopheno[3,2-b]selenophene' }, # From BBv2 P-25.3.2.4 { smiles => 'S1CC=CSC=2C1=COC2', iupac => '2H-[1,4]dithiepino[2,3-c]furan' }, { smiles => 'O1CC=C2OC=CC=C21', iupac => '2H-furo[3,2-b]pyran' }, { smiles => 'N1=CC=CC2=C1C=NOC2', iupac => '5H-pyrido[2,3-d][1,2]oxazine' }, { smiles => 'O1COC2=C1C=CO2', iupac => '2H-furo[2,3-d][1,3]dioxole' }, { smiles => 'O1P=CC2=C1OCO2', iupac => '5H-[1,3]dioxolo[4,5-d][1,2]oxaphosphole' }, { smiles => 'S1C=NC2=C1N=C[Se]2', iupac => '[1,3]selenazolo[5,4-d][1,3]thiazole' }, { smiles => 'O1C2=C(SC=C1)[Se]C=CO2', iupac => '[1,4]oxaselenino[2,3-b][1,4]oxathiine' }, { smiles => 'N1=CC=NC=2C1=CN=NC2', iupac => 'pyrazino[2,3-d]pyridazine' }, { smiles => 'O1SNC2=C1ONS2', iupac => '3H,5H-[1,3,2]oxathiazolo[4,5-d][1,2,3]oxathiazole' }, { smiles => 'S1C=2N(C=C1)C=CN2', iupac => 'imidazo[2,1-b][1,3]thiazole' }, # From BBv2 P-25.3.2.5.1 # From BBv2 P-25.3.3.1.2 { smiles => 'O1C=2C(=CC=C1)C=CC2', iupac => 'cyclopenta[b]pyran' }, { smiles => 'S1COC=2NC=CC21', iupac => '2H,4H-[1,3]oxathiolo[5,4-b]pyrrole' }, { smiles => 'O1C2=C(C=C1)C=CS2', iupac => 'thieno[2,3-b]furan' }, { smiles => 'N1C=NC2=C1C=CS2', iupac => '1H-thieno[2,3-d]imidazole' }, { smiles => 'N=1N2C(N=CC1)=NC=C2', iupac => 'imidazo[1,2-b][1,2,4]triazine' }, { smiles => 'O1COC2=C1N=CN2', iupac => '2H,4H-[1,3]dioxolo[4,5-d]imidazole' }, { smiles => 'CN(C=1C2=C(N=CN1)NC=C2)C', iupac => 'N,N-dimethyl-7H-pyrrolo[2,3-d]pyrimidin-4-amine' }, # PubChem 1861 { smiles => 'C1(=CC=CC=C1)C1=CC2=C(N=CN=C2N)N1C1=CC=CC=C1', iupac => '6,7-diphenylpyrrolo[2,3-d]pyrimidin-4-amine' }, # PubChem 49845043 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/38_xanthenes.t000066400000000000000000000040061463750375500176570ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-25.2.1 { smiles => 'C1=CC=CC2=NC3=CC=CC=C3C=C12', iupac => 'acridine' }, { smiles => 'C1=CC=CC=2OC3=CC=CC=C3CC12', iupac => '9H-xanthene' }, # From BBv2 P-25.2.2.2 { smiles => 'C1=CC=CC=2OC3=CC=CC=C3OC12', iupac => 'oxanthrene' }, { smiles => 'C1=CC=CC2=NC3=CC=CC=C3N=C12', iupac => 'phenazine' }, { smiles => 'C1=CC=CC=2OC3=CC=CC=C3C(C12)=O', iupac => '9H-xanthen-9-one' }, # From Wikipedia Xanthone { smiles => 'OC1=CC(=CC=2OC3=CC(=CC(=C3C(C12)=O)C)OC)OC', iupac => '1-hydroxy-3,6-dimethoxy-8-methyl-9H-xanthen-9-one' }, # From Wikipedia Lichexanthone { smiles => 'C1=CC=CC=2SC3=CC=CC=C3C(C12)=O', iupac => '9H-thioxanthen-9-one' }, # From Wikipedia Thioxanthone { smiles => 'CC(C)C1=CC=CC=2C(C3=CC=CC=C3SC12)=O', iupac => '4-(propan-2-yl)-9H-thioxanthen-9-one' }, # From Wikipedia Isopropylthioxanthone { smiles => 'ClC1=CC=2OC3=CC(=C(C=C3OC2C=C1Cl)Cl)Cl', iupac => '2,3,7,8-tetrachlorooxanthrene', AUTHOR => 1 }, # From Wikipedia 2,3,7,8-Tetrachlorodibenzodioxin { smiles => 'ClC=1C(=C(C(=C2OC=3C(=C(C(=C(C3OC12)Cl)Cl)Cl)Cl)Cl)Cl)Cl', iupac => 'octachlorooxanthrene', AUTHOR => 1 }, # From Wikipedia Octachlorodibenzodioxin { smiles => 'ClC1=C(C(=C(C=2OC3=C(C(=C(C=C3OC12)Cl)Cl)Cl)Cl)Cl)Cl', iupac => '1,2,3,4,6,7,8-heptachlorooxanthrene', AUTHOR => 1 }, # From Wikipedia Heptachlorodibenzo-p-dioxin { smiles => 'C1CCN(CC1)CCCC2C3=CC=CC=C3SC4=CC=CC=C24', iupac => '1-[3-(9H-thioxanthen-9-yl)propyl]piperidine' }, # PubChem 155569813 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/39_polyacenes.t000066400000000000000000000030561463750375500200310ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-25.1.2.1 { smiles => 'C1=CC=CC2=CC3=CC4=CC=CC=C4C=C3C=C12', iupac => 'tetracene' }, { smiles => 'C1=CC=CC2=CC3=CC4=CC5=CC=CC=C5C=C4C=C3C=C12', iupac => 'pentacene' }, { smiles => 'C1CCCC2CC3CCCCC3CC12', iupac => 'tetradecahydroanthracene' }, # From BBv3 P-31.2.3.3.2 { smiles => 'C12CC(CC(CC1)C2)C2=CC=CC1=CC3=CC=CC(=C3C=C21)C2CC1CCC(C2)C1', iupac => '1,8-di(bicyclo[3.2.1]octan-3-yl)anthracene' }, # From BBv3 P-44.2.1.1 { smiles => 'C1=CC=CC=2C(C3=CC=CC=C3C(C12)=O)=O', iupac => 'anthracene-9,10-dione' }, # From BBv3 P-58.2.2.3 { smiles => 'C1=C(C=CC2=CC=CC=C12)C1=CC2=CC3=CC(=C(C=C3C=C2C=C1C1=CC2=CC=CC=C2C=C1)C1=CC2=CC=CC=C2C=C1)C1=CC2=CC=CC=C2C=C1', iupac => '2,3,6,7-tetra(naphthalen-2-yl)anthracene' }, # From BBv3 P-61.2.3 { smiles => 'OC1=CC(=CC=2C(C3=CC=CC(=C3C(C12)=O)O)=O)C', iupac => '1,8-dihydroxy-3-methylanthracene-9,10-dione' }, # From BBv3 P-64.7.1 { smiles => 'CC1=C2C=C3C=CC=CC3=CC2=CC2=CC3=CC=CC=C3C=C12', iupac => '6-methylpentacene' }, # From Wikipedia Pentacene ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/40_polyaphenes.t000066400000000000000000000022421463750375500202020ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'OC1=C2C(C=3C=CC=CC3C(C2=C2C(C3=CC=CC=C3C(C2=C1O)=O)=O)=O)=O', iupac => '6,7-dihydroxypentaphene-5,8,13,14-tetrone' }, # PubChem 5379520 { smiles => 'CC1=C2C(C=3C=CC=CC3C(C2=C2C(C3=CC=CC=C3C(C2=C1)=O)=O)=O)=O', iupac => '6-methylpentaphene-5,8,13,14-tetrone' }, # PubChem 93949195 { smiles => 'FC1=C(C2=C(C3=C4C(=C5C(=C6C(=C(C(=C(C6=C(C5=C(C4=C(C(=C3C(=C2C(=C1F)F)F)F)F)F)F)F)F)F)F)F)F)F)C1=CC=CC=C1', iupac => '2,3,4,5,6,7,8,9,10,11,12,13,14,15,16-pentadecafluoro-1-phenylhexaphene', AUTHOR => 'flaky' }, # PubChem 121333830 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; if( $case->{AUTHOR} && $ok ) { diag 'test supposed to fail with AUTHOR_TESTING' . ( $case->{AUTHOR} !~ /^1$/ ? ': ' . $case->{AUTHOR} : '' ); } } ChemOnomatopist-0.10.0/t/41_isotopes.t000066400000000000000000000106441463750375500175260ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-44.4.1.11.1 { smiles => 'C(CCCC)[2H]', iupac => '(1-2H1)pentane' }, { smiles => 'C1(CCCCC1)([2H])[2H]', iupac => '(1,1-2H2)cyclohexane' }, { smiles => '[14CH2]1CCCCC1', iupac => '(14C1)cyclohexane', AUTHOR => 1 }, # From BBv3 P-44.4.1.11.2 { smiles => '[14CH2]1CCCC1', iupac => '(14C1)cyclopentane', AUTHOR => 1 }, { smiles => 'C1(CCCC1)[2H]', iupac => '(2H1)cyclopentane' }, # From BBv3 P-44.4.1.11.3 { smiles => 'C1(=CC=CC=C1)[3H]', iupac => '(3H1)benzene', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)[2H]', iupac => '(2H1)benzene', AUTHOR => 1 }, # From BBv3 P-44.4.1.11.4 { smiles => 'N1=C(C=CC=C1)[2H]', iupac => '(2-2H)pyridine' }, { smiles => 'N1=CC(=CC=C1)[2H]', iupac => '(3-2H)pyridine' }, # From BBv3 P-82.2.1 { smiles => '[14CH4]', iupac => '(14C)methane' }, { smiles => 'Cl[12CH](Cl)Cl', iupac => 'trichloro(12C)methane' }, { smiles => 'C[2H]', iupac => '(2H1)methane' }, { smiles => 'ClC([2H])([2H])Cl', iupac => 'dichloro(2H2)methane' }, { smiles => 'C(OC1=CC=CC=C1)([2H])([2H])[2H]', iupac => '(2H3)methoxybenzene', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)[13C]([13CH3])=O', iupac => '1-phenyl(1,2-13C2)ethan-1-one' }, { smiles => '[13CH3]C1=C(C=CC=C1)[13CH3]', iupac => '1,2-di[(13C)methyl]benzene' }, { smiles => '[13CH3]C1=[13CH]C=CC=C1', iupac => '1-(13C)methyl(2-13C)benzene', AUTHOR => 1 }, { smiles => '[2H]CCO', iupac => '(2-2H1)ethan-1-ol' }, { smiles => '[12CH3]CO', iupac => '(2-12C)ethan-1-ol' }, { smiles => 'N[14CH2]C1(CCCC1)O', iupac => '1-[amino(14C)methyl]cyclopentan-1-ol' }, { smiles => 'NCC1(CCCC1)[18OH]', iupac => '1-(aminomethyl)cyclopentan-1-(18O)ol' }, { smiles => '[131I]C1=CC=C2C=3C=CC(=CC3CC2=C1)NC(C)=O', iupac => 'N-[7-(131I)iodo-9H-fluoren-2-yl]acetamide', AUTHOR => 1 }, { smiles => 'C(C)OC([14CH2][14CH2]C(=O)[O-])=O.[Na+]', iupac => 'sodium 4-ethoxy-4-oxo(2,3-14C2)butanoate', AUTHOR => 1 }, { smiles => 'S1C([14CH2]CC1)C1=CC=NC=C1', iupac => '4-[(3-14C)thiolan-2-yl]pyridine' }, { smiles => '[35Cl]C(C[2H])C(CC)C([2H])([2H])[2H]', iupac => '2-(35Cl)chloro-3-(2H3)methyl(1-2H1)pentane' }, # From BBv2 P-82.2.2.1 { smiles => '[13CH3]C1=NC=CC=C1C', iupac => '2-(13C)methyl-3-methylpyridine' }, { smiles => 'C(C([2H])[2H])C(CO)C(CCC)CC', iupac => '2-(2,2-2H2)ethyl-3-ethylhexan-1-ol' }, # From BBv3 P-82.5.1 { smiles => 'FC(C[2H])(F)F', iupac => '1,1,1-trifluoro(2-2H1)ethane', AUTHOR => 1 }, { smiles => 'ClC1=C(C(=CC=C1)F)[2H]', iupac => '1-chloro-3-fluoro(2-2H)benzene', AUTHOR => 1 }, { smiles => 'COC1=C(C(=C(C(=C1[3H])[3H])[3H])[3H])O', iupac => '2-methoxy(3,4,5,6-3H4)phenol', AUTHOR => 1 }, # From BBv3 P-82.5.2 { smiles => 'C[14CH2]CC', iupac => '(2-14C)butane' }, { smiles => 'CC([14CH2]C)([2H])[2H]', iupac => '(3-14C,2,2-2H2)butane' }, { smiles => 'C[14CH2]C(C)[2H]', iupac => '(2-14C,3-2H1)butane' }, { smiles => 'C1(=CC(=CC=C1)[3H])O', iupac => '(3-3H)phenol' }, { smiles => 'C([C@@H](C)O)[2H]', iupac => '(2R)-(1-2H1)propan-2-ol', AUTHOR => 1 }, { smiles => 'C[C@@H](C[C@@H](C)[2H])[3H]', iupac => '(2S,4R)-(4-2H1,2-3H1)pentane', AUTHOR => 1 }, # From BBv3 P-82.6.3.3 { smiles => 'C1=C(C=CC2=CC=CC=C12)[15N]=NC1=CC=CC=C1', iupac => '1-(naphthalen-2-yl)-2-phenyl(1-15N)diazene' }, { smiles => 'C(CC)=[15N]N', iupac => '1-propylidene(1-15N)hydrazine' }, { smiles => 'C(C)S[34S]SCCC(=O)O', iupac => '3-[ethyl(2-34S)trisulfanyl]propanoic acid', AUTHOR => 1 }, { smiles => 'ClC1=C(C=CC2=CC=CC=C12)[15N]=[N+](C1=CC=CC=C1)[O-]', iupac => '1-(1-chloronaphthalen-2-yl)-2-phenyl(1-15N)diazene 2-oxide', AUTHOR => 1 }, { smiles => '[14C](CCCC)(=O)[O][3H]', iupac => '(1-14C)pentan(3H)oic acid', AUTHOR => 1 }, # From BBv3 P-82.2.4 { smiles => 'CCO[2H]', iupac => 'ethan(2H)ol' }, # From BBv3 P-82.6.1.1 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; if( $case->{AUTHOR} && $ok ) { diag 'test supposed to fail with AUTHOR_TESTING' . ( $case->{AUTHOR} !~ /^1$/ ? ': ' . $case->{AUTHOR} : '' ); } } ChemOnomatopist-0.10.0/t/42_phenanthrenes.t000066400000000000000000000022541463750375500205220ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'C1=CC=CC=2C3=CC=CC=C3C=CC12', iupac => 'phenanthrene' }, # From BBv2 P-25.2.1 { smiles => 'N1=CC=CC2=CC=C3N=CC=CC3=C12', iupac => '1,7-phenanthroline' }, { smiles => 'C1=CC=CC2=NC=C3C=CC=CC3=C12', iupac => 'phenanthridine' }, { smiles => 'COC=1C(=CC=2C=C(C3=CC(=CC=C3C2C1OC)O)OC)O', iupac => '3,4,9-trimethoxyphenanthrene-2,7-diol' }, # From Wikipedia Gymnopusin { smiles => 'C1=CC=CC=2C3=CC=CC=C3C(C(C12)=O)=O', iupac => 'phenanthrene-9,10-dione' }, # From Wikipedia Phenanthrenequinone { smiles => 'C1=CC=NC=2C(C(C3=NC=CC=C3C12)=O)=O', iupac => '4,7-phenanthroline-5,6-dione' }, # From Wikipedia Phanquinone ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/43_porphyrins.t000066400000000000000000000012001463750375500200640ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'C12=CC=C(N1)C=C1C=CC(=N1)C=C1C=CC(N1)=CC=1C=CC(N1)=C2', iupac => 'porphyrin' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/44_urea.t000066400000000000000000000041151463750375500166140ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'CNC(=O)N', iupac => 'methylurea' }, # From BBv3 P-14.3.4.3 { smiles => 'FN(C(N(F)F)=O)F', iupac => 'tetrafluorourea' }, # From BBv3 P-14.3.4.5 { smiles => 'NC(=S)N', iupac => 'thiourea' }, # From BBv3 P-15.5.3.4.3 { smiles => 'NC(=O)N', iupac => 'urea' }, # From BBv3 P-34.1.1.5 { smiles => 'CN(C(=O)N)N=O', iupac => 'N-methyl-N-nitrosourea', AUTHOR => 1 }, # From BBv3 P-61.5.2 # FIXME: detected as hydrazine # From BBv3 P-66.1.6.1.1.2 { smiles => 'CNC(=O)NC', iupac => 'N,N\'-dimethylurea' }, { smiles => 'CC(C)=NC(=O)N', iupac => 'N-(propan-2-ylidene)urea' }, { smiles => 'C(#N)C(CCSC)NC(=O)NC', iupac => 'N-[1-cyano-3-(methylsulfanyl)propyl]-N\'-methylurea', AUTHOR => 1 }, # From BBv2 { smiles => 'CC(CC)NC(=[Se])N', iupac => 'N-(butan-2-yl)selenourea' }, # From BBv3 P-66.1.6.1.3.1 { smiles => 'C(N)(=N)NC(=O)N', iupac => 'N-carbamimidoylurea', AUTHOR => 1 }, # From BBv3 P-66.1.6.1.1.3 { smiles => 'CNC(=O)NC1=C(C2=CC=CC=C2C=C1)C(=O)O', iupac => '2-[(methylcarbamoyl)amino]naphthalene-1-carboxylic acid', AUTHOR => 1 }, { smiles => 'C(=O)(NC1=CC=C2C=CC(=CC2=C1)S(=O)(=O)O)NC1=CC=C2C=CC(=CC2=C1)S(=O)(=O)O', iupac => '7,7′-[carbonylbis(azanediyl)]di(naphthalene-2-sulfonic acid)', AUTHOR => 1 }, { smiles => 'C(N)(=O)NC(C1=CC=CC=C1)=O', iupac => 'N-carbamoylbenzamide', AUTHOR => 1 }, { smiles => 'C(N)(=O)NS(=O)(=O)C1=CC=CC=C1', iupac => 'N-carbamoylbenzenesulfonamide', AUTHOR => 1 }, { smiles => 'C(N)(=O)NC(CC1=CC=CC=C1)=O', iupac => 'N-carbamoyl-2-phenylacetamide', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/45_nonstandard_valences.t000066400000000000000000000024501463750375500220540ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'C[SH5]', iupac => 'methyl-λ6-sulfane' }, # From BBv3 P-14.1.3 # From BBv3 P-15.4.3.2.5 { smiles => 'COCCOCCOCC[SH2]C', iupac => '2,5,8-trioxa-11λ4-thiadodecane' }, { smiles => 'C[SH2]CCSCCSCCSC', iupac => '2λ4,5,8,11-tetrathiadodecane' }, { smiles => 'O1CCOP12OCCCO2', iupac => '1,4,6,10-tetraoxa-5λ5-phosphaspiro[4.5]decane' }, # From BBv3 P-44.4.1.5 # From BBv3 P-24.8.1.1 { smiles => 'C1CCC12CC[PH3]CC2', iupac => '7λ5-phosphaspiro[3.5]nonane' }, { smiles => 'C1[SiH2]CC12CC[PH3]CC2', iupac => '7λ5-phospha-2-silaspiro[3.5]nonane' }, { smiles => 'C1CCS12CCCCC2', iupac => '4λ4-thiaspiro[3.5]nonane' }, { smiles => 'n1(nccc1c1ccc(F)cc1)c1ccccc1', iupac => '5-(4-fluorophenyl)-1-phenylpyrazole' }, # COD 2201522 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/46_polyhelicenes.t000066400000000000000000000015651463750375500205330ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # BBv2 P-25.1.2.6 { smiles => 'C1=CC=CC2=CC=C3C=CC4=CC=C5C=CC6=CC=CC=C6C5=C4C3=C12', iupac => 'hexahelicene' }, { smiles => 'C1=CC=CC2=CC=C3C=CC4=CC=C5C=CC6=CC=C7C=CC=CC7=C6C5=C4C3=C12', iupac => 'heptahelicene' }, { smiles => 'C1=CC=CC2=CC=C3C=CC4=CC=C5C=CC6=CC=C7C=CC8=CC=CC=C8C7=C6C5=C4C3=C12', iupac => 'octahelicene' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/47_diazenes.t000066400000000000000000000056331463750375500174730ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-68.3.1.3.1 { smiles => 'N(=NC=O)C=O', iupac => 'diazenedicarbaldehyde', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)N=NC#N', iupac => 'phenyldiazenecarbonitrile' }, { smiles => 'CN=NCC(=O)O', iupac => '(methyldiazenyl)acetic acid' }, { smiles => 'N(=N)CCC(=O)O', iupac => '3-diazenylpropanoic acid', AUTHOR => 1 }, { smiles => 'N(=N)C=1C=C(C(=O)O)C=CC1N=N', iupac => '3,4-bis(diazenyl)benzoic acid' }, # From BBv3 P-68.3.1.3.2.1 { smiles => 'CN=NC', iupac => 'dimethyldiazene' }, { smiles => 'C1(=CC=CC=C1)N=NC1=CC=CC=C1', iupac => 'diphenyldiazene' }, { smiles => 'ClC=1C=C(C=CC1)N=NC1=CC=C(C=C1)Cl', iupac => '(3-chlorophenyl)(4-chlorophenyl)diazene' }, { smiles => 'C1(=CC=CC2=CC=CC=C12)N=NC1=CC2=CC=CC=C2C=C1', iupac => '(naphthalen-1-yl)(naphthalen-2-yl)diazene' }, # From BBv3 P-68.3.1.3.2.2 { smiles => 'C(=C)N=NC', iupac => 'ethenyl(methyl)diazene' }, { smiles => 'C1=C(C=CC2=CC=CC=C12)N=NC1=CC=CC=C1', iupac => '(naphthalen-2-yl)(phenyl)diazene' }, { smiles => 'C1(=CC=CC=C1)N=NC1=CC=C(C=C1)S(=O)(=O)O', iupac => '4-(phenyldiazenyl)benzene-1-sulfonic acid' }, { smiles => 'ClC1=CC(=C(C=C1)N=NC1=C(C=CC2=CC=CC=C12)N)C', iupac => '1-[(4-chloro-2-methylphenyl)diazenyl]naphthalen-2-amine' }, { smiles => 'OC1=C(C2=CC=CC=C2C=C1)N=NC1=CC=C(C=C1)S(=O)(=O)O', iupac => '4-[(2-hydroxynaphthalen-1-yl)diazenyl]benzene-1-sulfonic acid' }, { smiles => 'N(=NC1=CC=C(C(=O)O)C=C1)C1=CC=C(C(=O)O)C=C1', iupac => '4,4\'-diazenediyldibenzoic acid', AUTHOR => 1 }, # From BBv3 P-68.3.1.3.2.3 { smiles => 'C1=C(C=CC2=CC3=CC=CC=C3C=C12)N=NC1=CC=C2C=CC(=CC2=C1)N=NC1=CC=CC=C1', iupac => '{7-[(anthracen-2-yl)diazenyl]naphthalen-2-yl}(phenyl)diazene', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)N=NC1=C(C2=C(C(=CC=C2C=C1)N=NC1=CC=CC=C1)O)O', iupac => '2,7-bis(phenyldiazenyl)naphthalene-1,8-diol' }, # From BBv3 P-68.3.1.3.4 { smiles => 'N(=N)C(=O)NN', iupac => 'diazenecarbohydrazide', AUTHOR => 1 }, { smiles => 'N(=N)C(NN)=S', iupac => 'diazenecarbothiohydrazide', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)NNC(=O)N=NC1=CC=CC=C1', iupac => 'N\',2-diphenyldiazenecarbohydrazide', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)NNC(=S)N=NC1=CC=CC=C1', iupac => 'N\',2-diphenyldiazenecarbothiohydrazide', AUTHOR => 1 }, { smiles => 'N(=N)C(=O)NNCCC(=O)OCC', iupac => 'ethyl 3-(diazenecarbohydrazido)propanoate', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/48_hydrazides.t000066400000000000000000000035011463750375500200300ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-66.3.1.1 { smiles => 'C(CCCC)(=O)NN', iupac => 'pentanehydrazide' }, { smiles => 'C(CCC(=O)NN)(=O)NN', iupac => 'butanedihydrazide', AUTHOR => 1 }, { smiles => 'C1(CCCCC1)C(=O)NN', iupac => 'cyclohexanecarbohydrazide', AUTHOR => 1 }, { smiles => 'N1(CCCCC1)C(=O)NN', iupac => 'piperidine-1-carbohydrazide', AUTHOR => 1 }, { smiles => 'CS(=O)(=O)NN', iupac => 'methanesulfonohydrazide', AUTHOR => 1 }, { smiles => 'C(C)(C(=O)NN)(C(=O)NN)C(=O)NN', iupac => 'ethane-1,1,1-tricarbohydrazide', AUTHOR => 1 }, # From BBv2 P-66.3.1.2.1 { smiles => 'C(#N)NN', iupac => 'cyanohydrazide', AUTHOR => 1 }, { smiles => 'C(=O)NN', iupac => 'formohydrazide', AUTHOR => 1 }, { smiles => 'C(C)(=O)NN', iupac => 'acetohydrazide', AUTHOR => 1 }, { smiles => 'C(C1=CC=CC=C1)(=O)NN', iupac => 'benzohydrazide', AUTHOR => 1 }, { smiles => 'C(C(=O)NN)(=O)NN', iupac => 'oxalohydrazide', AUTHOR => 1 }, { smiles => 'C(C)(NN)=N', iupac => 'ethanimidohydrazide', AUTHOR => 1 }, # From BBv3 P-66.3.4 { smiles => 'C(CC)(NN)=S', iupac => 'propanethiohydrazide' }, { smiles => 'C1(=CC=CC=C1)C(NN)=S', iupac => 'benzenecarbothiohydrazide', AUTHOR => 1 }, { smiles => 'N(N)C(C(=O)NN)=S', iupac => '2-hydrazinyl-2-sulfanylideneacetohydrazide', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/49_amidines.t000066400000000000000000000042731463750375500174630ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-66.4.1.1 { smiles => 'C(CCCCC)(N)=N', iupac => 'hexanimidamide' }, { smiles => 'C1(CCCCC1)C(N)=N', iupac => 'cyclohexanecarboximidamide' }, { smiles => 'C(C)(N)=N', iupac => 'ethanimidamide' }, { smiles => 'CS(N)=N', iupac => 'methanesulfinimidamide' }, { smiles => 'C(N)=N', iupac => 'methanimidamide' }, { smiles => 'C(CCCC(N)=N)(N)=N', iupac => 'pentanediimidamide', AUTHOR => 1 }, { smiles => '[SiH2]([SiH2]C(N)=N)C(N)=N', iupac => 'disilane-1,2-dicarboximidamide', AUTHOR => 1 }, { smiles => 'C(CCC(N)=N)(N)=N', iupac => 'butanediimidamide', AUTHOR => 1 }, { smiles => 'C(C(N)=N)(N)=N', iupac => 'ethanediimidamide', AUTHOR => 1 }, { smiles => 'C=1(C(=CC=CC1)C(N)=N)C(N)=N', iupac => 'benzene-1,2-dicarboximidamide', AUTHOR => 1 }, { smiles => 'C1(=CC=C(C=C1)C(N)=N)C(N)=N', iupac => 'benzene-1,4-dicarboximidamide', AUTHOR => 1 }, { smiles => 'C(C)NC(=N)C1(CCCCC1)C(N(C)C)=N', iupac => 'N\'\'1-ethyl-N1,N1-dimethylcyclohexane-1,1-dicarboximidamide', AUTHOR => 1 }, { smiles => 'NS(=N)CCC(=O)O', iupac => '3-(S-aminosulfinimidoyl)propanoic acid' }, # From BBv3 P-66.4.1.3.4 # From BBv3 P-66.4.1.3.5 { smiles => 'C(C)(NC1=CC=C(C(=O)O)C=C1)=N', iupac => '4-ethanimidamidobenzoic acid', AUTHOR => 1 }, { smiles => 'C(C)S(NC1=C(C(=O)O)C=CC=C1)(=N)=N', iupac => '2-(ethanesulfonodiimidamido)benzoic acid', AUTHOR => 1 }, { smiles => '[NH]=C(N)c1ccccc1', iupac => 'benzenecarboximidamide' }, # From Wikipedia Benzamidine { smiles => 'CC1=CC(=CC=C1)CC(=NCC(C)OC2=CC=CC(=C2)OC)N', iupac => 'N\'-[2-(3-methoxyphenoxy)propyl]-2-(3-methylphenyl)ethanimidamide', AUTHOR => 1 }, # From Wikipedia Xylamidine ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/50_anions.t000066400000000000000000000034651463750375500171530ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-72.2.1 { smiles => 'C1(=CC=CC=C1)[C-2]C1=CC=CC=C1', iupac => 'diphenylmethanediide' }, # From BBv3 P-72.2.2.1 { smiles => '[C-]1=CC=CC=C1', iupac => 'benzenide' }, # From BBv3 P-72.2.2.2.1.1 { smiles => 'C(C)(=O)[O-]', iupac => 'acetate' }, { smiles => 'C(CC)(=O)O[O-]', iupac => 'propaneperoxoate', AUTHOR => 1 }, { smiles => 'C(C)(O[O-])=S', iupac => 'ethaneperoxothioate', AUTHOR => 1 }, { smiles => 'CC(=O)O[S-]', iupac => 'ethane(OS-thioperoxoate)', AUTHOR => 1 }, { smiles => 'C(CC)([O-])=S', iupac => 'propanethioate' }, { smiles => 'C(C)([O-])=S', iupac => 'ethanethioate' }, { smiles => 'C1(=CC=CC=C1)S(=O)(=O)[O-]', iupac => 'benzenesulfonate', AUTHOR => 1 }, { smiles => 'C(C1=CC=CC=C1)P([O-])CC1=CC=CC=C1', iupac => 'dibenzylphosphinite', AUTHOR => 1 }, { smiles => 'N1=C(C=CC=C1C(=O)[O-])C(=O)[O-]', iupac => 'pyridine-2,6-dicarboxylate' }, { smiles => 'N1C(=CC=C1)C([O-])=N', iupac => '1H-pyrrole-2-carboximidate', AUTHOR => 1 }, # From BBv3 P-72.2.2.2.2 { smiles => 'CO[O-]', iupac => 'methaneperoxolate' }, { smiles => 'C(CO[O-])O[O-]', iupac => 'ethane-1,2-bis(peroxolate)', AUTHOR => 1 }, { smiles => 'C1(=CC=C(C=C1)S[S-])S[S-]', iupac => 'benzene-1,4-bis(dithioperoxolate)', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/51_cations.t000066400000000000000000000033231463750375500173160ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-73.2.2.1.1 { smiles => '[CH3+]', iupac => 'methylium' }, { smiles => 'C1(=CC=CC=C1)[Si+](C1=CC=CC=C1)C1=CC=CC=C1', iupac => 'triphenylsilylium', AUTHOR => 1 }, { smiles => '[CH2+]CC', iupac => 'propylium', AUTHOR => 1 }, { smiles => '[CH+]1CCC1', iupac => 'cyclobutylium' }, # From BBv3 P-73.2.2.1.2 { smiles => 'C1(=CC=CC=C1)[S+]', iupac => 'phenylsulfanylium' }, { smiles => 'CNN=[N+]', iupac => '3-methyltriaz-1-en-1-ylium', AUTHOR => 1 }, { smiles => 'C[Si]([Si+]([Si](C)(C)C)C)(C)C', iupac => 'heptamethyltrisilan-2-ylium', AUTHOR => 1 }, { smiles => 'O1[C+]=CC=C1', iupac => 'furan-2-ylium' }, { smiles => 'C1CCCC12CC[CH+]CC2', iupac => 'spiro[4.5]decan-8-ylium', AUTHOR => 1 }, { smiles => '[CH2+]C[CH2+]', iupac => 'propane-1,3-bis(ylium)' }, { smiles => 'CN([N+2])C', iupac => '2,2-dimethylhydrazine-1,1-bis(ylium)' }, { smiles => 'C[C+2]C', iupac => 'propane-2,2-bis(ylium)' }, { smiles => '[CH+]1[CH+]C=C1', iupac => 'cyclobut-3-ene-1,2-bis(ylium)' }, { smiles => '[CH+]1C=CC=C1', iupac => 'cyclopenta-2,4-dien-1-ylium' }, { smiles => 'O=C1[N+]C(CC1)=O', iupac => '2,5-dioxopyrrolidin-1-ylium', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/52_seniority.t000066400000000000000000000060261463750375500177070ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-44.1.1 { smiles => 'C1(CCCCC1)CCC(=O)O', iupac => '3-cyclohexylpropanoic acid' }, { smiles => 'C(CC)C=1C=C(C(=O)O)C=CC1', iupac => '3-propylbenzoic acid' }, { smiles => 'ClCCCCC(CCO)C(C)O', iupac => '3-(4-chlorobutyl)pentane-1,4-diol' }, { smiles => 'N(N)C(=O)O', iupac => 'hydrazinecarboxylic acid', AUTHOR => 1 }, { smiles => '[SiH3]CCC(=O)O', iupac => '3-silylpropanoic acid' }, { smiles => 'C(C)[SiH2]C(=O)O', iupac => 'ethylsilanecarboxylic acid', AUTHOR => 1 }, { smiles => 'C(CCC)OCCOCC(SCCSCC(=O)O)(CCSCCSCC(=O)O)COCCOCCCC', iupac => '7,7-bis[(2-butoxyethoxy)methyl]-3,6,10,13-tetrathiapentadecane-1,15-dioic acid', AUTHOR => 1 }, { smiles => '[SiH2](CC[SiH3])CC[SiH3]', iupac => '[silanediyldi(ethane-2,1-diyl)]bis(silane)', AUTHOR => 1 }, # From BBv3 P-44.1.2.1 { smiles => 'C[Si](C)(C)C', iupac => 'tetramethylsilane', AUTHOR => 1 }, { smiles => 'CP[SiH3]', iupac => 'methyl(silyl)phosphane', AUTHOR => 1 }, { smiles => 'C(C)(C)(C)[Si](OCC1OC1)(C)C', iupac => 'tert-butyldi(methyl)(oxiranylmethoxy)silane', AUTHOR => 1 }, { smiles => 'C(=O)(O)CC[SiH2][SiH2]C(=O)O', iupac => '2-(2-carboxyethyl)disilane-1-carboxylic acid', AUTHOR => 1 }, { smiles => 'O1C(=CC2=C1C=CC=C2)P', iupac => '(1-benzofuran-2-yl)phosphane' }, { smiles => 'C[Si](N1C=NC=C1)(C)C', iupac => '1-(trimethylsilyl)-1H-imidazole', AUTHOR => 1 }, { smiles => 'C(#N)C1=PC=CC(=C1)C1CC(OCC1)C#N', iupac => '4-(2-cyanophosphinin-4-yl)oxane-2-carbonitrile' }, { smiles => 'P1=C(C=CC=C1)PC=1OC=CC1', iupac => '2-[(phosphinin-2-yl)phosphanyl]furan', AUTHOR => 1 }, { smiles => 'O1CC(=CC=C1)NNC1[SiH2]CCC1', iupac => '1-(2H-pyran-3-yl)-2-(silolan-2-yl)hydrazine' }, { smiles => 'C(SCSCSCSC)NCOCOCOCOC', iupac => 'N-(2,4,6,8-tetrathianonan-1-yl)-2,4,6,8-tetraoxanonan-1-amine', AUTHOR => 1 }, { smiles => 'C([SiH2]C[SiH2]C[SiH2]C[SiH2]C)C1CC(COC1)COCOCOCOC', iupac => '1-[5-(2,4,6,8-tetrasilanonan-1-yl)oxan-3-yl]-2,4,6,8-tetraoxanonane', AUTHOR => 1 }, # From BBv3 P-44.1.2.2 { smiles => 'C(CCCCCC)C1=CC=CC=C1', iupac => 'heptylbenzene' }, { smiles => 'C(=C)C1CCCCC1', iupac => 'ethenylcyclohexane' }, { smiles => 'C(C1=CC=CC=C1)C1=CC=CC=C1', iupac => '1,1\'-methylenedibenzene', AUTHOR => 1 }, { smiles => 'C(=CC1CCCCC1)C1CCCCC1', iupac => '1,1\'-(ethene-1,2-diyl)dicyclohexane', AUTHOR => 1 }, { smiles => 'N(N)C1=NC=CC=C1', iupac => '2-hydrazinylpyridine', AUTHOR => 1 }, { smiles => 'N(N)C=1NCCN1', iupac => '2-hydrazinyl-4,5-dihydro-1H-imidazole' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/53_bridged_bicycles.t000066400000000000000000000064151463750375500211420ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-12.1 { smiles => 'C12CCC(CC1)C2', iupac => 'bicyclo[2.2.1]heptane' }, { smiles => 'CC1(C2CCC1CC2)C', iupac => '7,7-dimethylbicyclo[2.2.1]heptane' }, # From BBv3 P-23.3.1 { smiles => 'C12COCC(CC1)C2', iupac => '3-oxabicyclo[3.2.1]octane' }, { smiles => 'C12[Se]CC(CC1)C2', iupac => '2-selenabicyclo[2.2.1]heptane' }, { smiles => 'C12COCC(OC1)CO2', iupac => '3,6,8-trioxabicyclo[3.2.2]nonane' }, # From BBv3 P-23.3.2.1 { smiles => 'C12OCSC(CC1)C2', iupac => '2-oxa-4-thiabicyclo[3.2.1]octane' }, # From BBv3 P-23.3.2.2 { smiles => 'C12C[SH2]CC(CC1)C2', iupac => '3λ4-thiabicyclo[3.2.1]octane' }, # From BBv3 P-23.6.1 { smiles => 'C12[AsH3][AsH]C(CC1)C2', iupac => '2λ5,3-diarsabicyclo[2.2.1]heptane', AUTHOR => 'flaky' }, # From BBv3 P-23.6.2 # From BBv3 P-23.7 { smiles => 'C12CC3CC(CC(C1)C3)C2', iupac => 'adamantane', AUTHOR => 1 }, { smiles => 'C12C3C4C5C3C1C5C24', iupac => 'cubane', AUTHOR => 1 }, { smiles => 'N12CCC(CC1)CC2', iupac => '1-azabicyclo[2.2.2]octane' }, { smiles => 'C12C3C4C2C4C31', iupac => 'tetracyclo[2.2.0.02,6.03,5]hexane', AUTHOR => 1 }, # From BBv3 P-31.1.4.1 { smiles => 'C12C=CCC(CC1)C2', iupac => 'bicyclo[3.2.1]oct-2-ene' }, { smiles => 'C12C=CC(C=C1)CC2', iupac => 'bicyclo[2.2.2]octa-2,5-diene', AUTHOR => 1 }, # From BBv3 P-31.1.4.2 { smiles => 'C12CCCCC2=CC1', iupac => 'bicyclo[4.2.0]oct-6-ene', AUTHOR => 1 }, { smiles => 'C12CCCCCCC(=CCCCC1)C2', iupac => 'bicyclo[6.5.1]tetradec-8-ene' }, { smiles => 'C12=CC=CC=C2C1', iupac => 'bicyclo[4.1.0]hepta-1,3,5-triene', AUTHOR => 1 }, { smiles => 'C12CC3=CCCC(CC(CC=4CCCC(C1)C4)C2)C3', iupac => 'tetracyclo[7.7.1.13,7.111,15]nonadeca-3,11(18)-diene', AUTHOR => 1 }, # From BBv3 P-31.1.4.3 { smiles => 'C12C#CCCCCCCCC=CC=CCC(CC=C1)C2', iupac => 'bicyclo[14.3.1]icosa-11,13,18-trien-2-yne' }, { smiles => 'C12C=CCCCCCCCC#CC(CCC1)C2', iupac => 'bicyclo[11.3.1]heptadec-2-en-11-yne' }, { smiles => 'C12C#CC=CC=CCCC(=CCC1)C2', iupac => 'bicyclo[8.3.1]tetradeca-4,6,10-trien-2-yne' }, # From BBv3 P-31.1.4.4 { smiles => 'C12SCC(C=C1)CC2', iupac => '2-thiabicyclo[2.2.2]oct-5-ene' }, { smiles => 'C12OCC(C=C1)C2', iupac => '2-oxabicyclo[2.2.1]hept-5-ene' }, { smiles => 'C12CNCC(C=C1)CC2', iupac => '3-azabicyclo[3.2.2]non-6-ene' }, # From BBv3 P-44.4.1.5 { smiles => 'C12COCCCCCCC(CC1)O2', iupac => '3,13-dioxabicyclo[8.2.1]tridecane' }, { smiles => 'C12CCOCCCCCC(CC1)O2', iupac => '4,13-dioxabicyclo[8.2.1]tridecane' }, { smiles => 'C12CCCC(CCC1)B2C2C[Sn](CC2)(C)C', iupac => '3-(9-borabicyclo[3.3.1]nonan-9-yl)-1,1-dimethylstannolane' }, # From BBv3 P-68.1.5.2.3 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; if( $case->{AUTHOR} && $ok ) { diag 'test supposed to fail with AUTHOR_TESTING' . ( $case->{AUTHOR} !~ /^1$/ ? ': ' . $case->{AUTHOR} : '' ); } } ChemOnomatopist-0.10.0/t/54_Williams_Yerin_2013.t000066400000000000000000000030621463750375500212550ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'O1C(CCC1)=O', iupac => 'oxolan-2-one' }, # Table 1 { smiles => 'C=C1C=CC(C=C1C1C(C(N2CCCCC12)=O)C(=O)C1CN(CCC1)C)=O', iupac => '1-(6-methylene-3-oxo-cyclohexa-1,4-dienyl)-2-(1-methyl-piperidine-3-carbonyl)-hexahydro-indolizin-3-one', AUTHOR => 1 }, # Figure 1 # Table 3 { smiles => 'CC1=CC=C(C=C1)C1=CC=CC=C1', iupac => '4-methylbiphenyl', AUTHOR => 1 }, { smiles => 'C1CCCC2C3C4CCCCC4C(C12)C3', iupac => 'tetradecahydro-9,10-methanoanthracene', AUTHOR => 1 }, { smiles => 'COCCOCCOCCOC', iupac => '2,5,8,11-tetraoxadodecane' }, { smiles => 'O(CC(=O)O)CC(=O)O', iupac => '2,2\'-oxydiacetic acid', AUTHOR => 1 }, { smiles => 'CC1CC2(CCC1)CCCCC2', iupac => '2-methylspiro[5.5]undecane' }, # Figure 4 # Table 4 { smiles => 'BrCC(CCC)CCl', iupac => '1-bromo-2-(chloromethyl)pentane' }, { smiles => 'ClC1C(C1)OC1C(C1)F', iupac => '1-chloro-2-(2-fluorocyclopropoxy)cyclopropane', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; if( $case->{AUTHOR} && $ok ) { diag 'test supposed to fail with AUTHOR_TESTING' . ( $case->{AUTHOR} !~ /^1$/ ? ': ' . $case->{AUTHOR} : '' ); } } ChemOnomatopist-0.10.0/t/55_multiplication.t000066400000000000000000000030741463750375500207220ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-45.1.1 { smiles => 'O(C1=CC(=C(C(=O)O)C=C1)Cl)C1=CC(=C(C(=O)O)C=C1)Cl', iupac => '4,4\'-oxybis(2-chlorobenzoic acid)', AUTHOR => 1 }, { smiles => 'C(=O)(O)C1=CC(=C(OC2=CC(=C(C(=O)O)C=C2)Cl)C=C1)Cl', iupac => '4-(4-carboxy-2-chlorophenoxy)-2-chlorobenzoic acid' }, # From BBv3 P-45.1.2 { smiles => 'C1(=CC=CC=C1)C(SCC1=CC=CC=C1)SCC1=CC=CC=C1', iupac => '1,1\'-[(phenylmethylene)bis(sulfanediylmethylene)]dibenzene', AUTHOR => 1 }, { smiles => 'C(=CC1=CC=C(N)C=C1)(C1=CC=C(N)C=C1)C1=CC=C(N)C=C1', iupac => '4,4\',4\'\'-(ethene-1,1,2-triyl)trianiline', AUTHOR => 1 }, { smiles => 'P(CP(O)(O)=O)(CP(O)(O)=O)CP(O)(O)=O', iupac => '[phosphanetriyltris(methylene)]tris(phosphonic acid)', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(C1=CC=CC=C1)SC(OC(C1=CC=CC=C1)(C1=CC=CC=C1)C1=CC=CC=C1)(C1=CC=CC=C1)C1=CC=CC=C1', iupac => '1,1\',1\'\'-({[(diphenylmethyl)sulfanyl]diphenylmethoxy}methanetriyl)tribenzene', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; if( $case->{AUTHOR} && $ok ) { diag 'test supposed to fail with AUTHOR_TESTING' . ( $case->{AUTHOR} !~ /^1$/ ? ': ' . $case->{AUTHOR} : '' ); } } ChemOnomatopist-0.10.0/t/56_substituents.t000066400000000000000000000135761463750375500204520ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-45.2.1 { smiles => 'COC1=CC=C(NC2=CC=CC=C2)C=C1', iupac => '4-methoxy-N-phenylaniline' }, { smiles => 'ClC1=CC(=C(C#N)C=C1)CC1=CC(=CC=C1)C#N', iupac => '4-chloro-2-[(3-cyanophenyl)methyl]benzonitrile' }, { smiles => 'CC1=CC=C(C=C1)COC1=CC=CC=C1', iupac => '1-methyl-4-(phenoxymethyl)benzene' }, { smiles => 'CN(C(C(CC1=CC(=C(C=C1)C)CC(C(=O)NC)C)C)=O)C', iupac => 'N,N,2-trimethyl-3-{4-methyl-3-[2-methyl-3-(methylamino)-3-oxopropyl]phenyl}propanamide', AUTHOR => 1 }, { smiles => 'CS1(CC(CCC1)CSC1C[SH2]CCC1)C', iupac => '1,1-dimethyl-3-{[(1λ4-thian-3-yl)sulfanyl]methyl}-1λ4-thiane', AUTHOR => 1 }, { smiles => 'C(C)C(C(C)C)CCC', iupac => '3-ethyl-2-methylhexane' }, { smiles => 'CC(CC(CCC(=O)O)CCC)C', iupac => '6-methyl-4-propylheptanoic acid' }, { smiles => 'BrC(C(C(C(=O)O)C(=C)C(CC)CBr)=C)(CC)C', iupac => '4-bromo-2-[3-(bromomethyl)pent-1-en-2-yl]-4-methyl-3-methylidenehexanoic acid', AUTHOR => 1 }, { smiles => '[SiH2]([SiH3])SS[SiH2][Si](C)(C)C', iupac => '2-(disilanyldisulfanyl)-1,1,1-trimethyldisilane', AUTHOR => 1 }, { smiles => 'C(=O)(O)C1=C(C=C(OC2=C(C(=C(C(=O)O)C=C2)P)[SH5])C=C1)[SH5]', iupac => '4-[4-carboxy-3-(λ6-sulfanyl)phenoxy]-2-phosphanyl-3-(λ6-sulfanyl)benzoic acid' }, { smiles => '[81Br]C(C(C(CC(=O)O)C(CC)[81Br])C)C', iupac => '5-(81Br)bromo-3-[1-(81Br)bromopropyl]-4-methylhexanoic acid' }, # From BBv3 P-45.2.2 { smiles => 'NC1=C(OC2=C(NC)C=CC=C2)C=CC(=C1)C', iupac => '2-(2-amino-4-methylphenoxy)-N-methylaniline', AUTHOR => 1 }, { smiles => 'BrC1=C(C(=CC2=CC(=CC=C12)[N+](=O)[O-])Cl)CCC1=C(C2=CC(=CC=C2C=C1F)F)F', iupac => '1-bromo-3-chloro-6-nitro-2-[2-(1,3,7-trifluoronaphthalen-2-yl)ethyl]naphthalene' }, { smiles => 'C(=O)(O)C1=CC=C(C=C1)C(C=1C=C(C(=O)O)C=CC1)(C=1C=C(C(=O)O)C=CC1)C1=CC=C(C=C1)C(=O)O', iupac => '3,3\'-[bis(4-carboxyphenyl)methylene]dibenzoic acid', AUTHOR => 1 }, { smiles => 'NC(C(CC=1C=CC(=C(C1)CCC(=O)NC)C)C)=O', iupac => '3-[5-(3-amino-2-methyl-3-oxopropyl)-2-methylphenyl]-N-methylpropanamide', AUTHOR => 1 }, { smiles => 'BrC(C(CCC(=O)O)C(CC[N+](=O)[O-])Cl)C(C)Br', iupac => '5,6-dibromo-4-(1-chloro-3-nitropropyl)heptanoic acid' }, { smiles => 'NC(CO)CCC(C(CCCO)C)CC(CCO)Cl', iupac => '2-amino-5-(2-chloro-4-hydroxybutyl)-6-methylnonane-1,9-diol' }, { smiles => 'CC(C(CC=C)C=C(C)C)=CC', iupac => '5-methyl-4-(2-methylprop-1-en-1-yl)hepta-1,5-diene' }, { smiles => 'BrC(C(C(CC(=O)O)C(CC[N+](=O)[O-])[PH4])[PH4])C', iupac => '5-bromo-3-[3-nitro-1-(λ5-phosphanyl)propyl]-4-(λ5-phosphanyl)hexanoic acid' }, { smiles => 'C(=O)(O)C1=CC(=C(OC2=CC(=C(C(=O)O)C=C2)[PH4])C=C1)P', iupac => '4-(4-carboxy-2-phosphanylphenoxy)-2-(λ5-phosphanyl)benzoic acid' }, { smiles => 'C(=O)(O)C1=CC(=C(OC2=CC(=C(C(=O)O)C=C2)[SH5])C=C1)[SH3]', iupac => '4-[4-carboxy-2-(λ4-sulfanyl)phenoxy]-2-(λ6-sulfanyl)benzoic acid' }, { smiles => 'C(=O)(O)C1=CC(=C(OC2=CC(=C(C(=O)O)C=C2)[SH5])C=C1)P', iupac => '4-(4-carboxy-2-phosphanylphenoxy)-2-(λ6-sulfanyl)benzoic acid' }, { smiles => 'N(C1C=C(C=C(C1)C)SC1=C(C(CC=C1)N([2H])[2H])C)([2H])[2H]', iupac => '3-{[3-(2H2)amino-5-methylcyclohexa-1,5-dien-1-yl]sulfanyl}-2-methylcyclohexa-2,4-dien-1-(2H2)amine', AUTHOR => 1 }, { smiles => '[81Br]C(C(CC(=O)O)CC(C)[81Br])CC', iupac => '4-(81Br)bromo-3-[2-(81Br)bromopropyl]hexanoic acid' }, { smiles => 'ClC(C(C)O)C(C(C(CC(C)O)C)CCC(C)O)C', iupac => '3-chloro-5-(3-hydroxybutyl)-4,6-dimethylnonane-2,8-diol' }, # From BBv3 P-45.2.3 { smiles => 'ClC=1C=NC2=CC(=CC=C2C1[N+](=O)[O-])SC1=CC=C2C(=C(C=NC2=C1)[N+](=O)[O-])Cl', iupac => '3-chloro-7-[(4-chloro-3-nitroquinolin-7-yl)sulfanyl]-4-nitroquinoline' }, { smiles => 'BrC1=C(NC2=C(C=C(C=C2)Br)Cl)C=CC(=C1)Cl', iupac => '2-bromo-N-(4-bromo-2-chlorophenyl)-4-chloroaniline' }, { smiles => 'C(C)C1=C(C=CC2=CC=C(C=C12)OC1=CC2=C(C(=CC=C2C=C1)CC)CCC)CCC', iupac => '1-ethyl-7-[(7-ethyl-8-propylnaphthalen-2-yl)oxy]-2-propylnaphthalene', AUTHOR => 1 }, { smiles => 'BrC(C(CCC(=O)O)C(C(C)Br)F)C(C)F', iupac => '5-bromo-4-(2-bromo-1-fluoropropyl)-6-fluoroheptanoic acid' }, { smiles => 'BrC(C(C(=O)O)C(CBr)O)CO', iupac => '3-bromo-2-(2-bromo-1-hydroxyethyl)-4-hydroxybutanoic acid', AUTHOR => 1 }, { smiles => 'BrCCC(CO)CCCl', iupac => '2-(2-bromoethyl)-4-chlorobutan-1-ol' }, { smiles => 'BrC(CCCC(CCl)CBr)Cl', iupac => '1-bromo-5-(bromomethyl)-1,6-dichlorohexane' }, { smiles => 'C(C)C(C(CC=CC=C)C(C)C(C=C)CC)C(C=C)C', iupac => '7-ethyl-6-(3-ethylpent-4-en-2-yl)-8-methyldeca-1,3,9-triene' }, { smiles => 'CNC(CCC1=CC(=C(C=C1)C)CCC(NCCC)=O)=O', iupac => 'N-methyl-3-{4-methyl-3-[3-oxo-3-(propylamino)propyl]phenyl}propanamide', AUTHOR => 1 }, { smiles => 'BrC(C([PH4])C(CC(=O)O)C(C(C)Cl)[PH4])C', iupac => '3-[2-bromo-1-(λ5-phosphanyl)propyl]-5-chloro-4-(λ5-phosphanyl)hexanoic acid' }, { smiles => '[81Br]C(C(CC(=O)O)C(C(C)Br)[81Br])C(C)Cl', iupac => '4-(81Br)bromo-3-[1-(81Br)bromo-2-bromopropyl]-5-chlorohexanoic acid' }, { smiles => 'C(CCC)C(CC(CC)C)C(CC(CC)CC)CCC', iupac => '5-butyl-8-ethyl-3-methyl-6-propyldecane' }, { smiles => 'C(C)C1=CC=C(C2=CC(=CC=C12)[Se]C1=CC2=C(C=CC(=C2C=C1)CCC)CC)CCC', iupac => '1-ethyl-6-[(8-ethyl-5-propylnaphthalen-2-yl)selanyl]-4-propylnaphthalene', AUTHOR => 1 }, { smiles => 'BrC(CCC(C(CCl)Br)C(CBr)I)Cl', iupac => '1,5-dibromo-4-(2-bromo-1-iodoethyl)-1,6-dichlorohexane' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; if( $case->{AUTHOR} && $ok ) { diag 'test supposed to fail with AUTHOR_TESTING' . ( $case->{AUTHOR} !~ /^1$/ ? ': ' . $case->{AUTHOR} : '' ); } } ChemOnomatopist-0.10.0/t/57_polyfunctional.t000066400000000000000000000040501463750375500207300ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-63.7 { smiles => 'CC(CS)(C)OS', iupac => '2-methyl-2-(sulfanyloxy)propane-1-thiol' }, { smiles => 'OCCC1=C(C=CC=C1)O', iupac => '2-(2-hydroxyethyl)phenol' }, { smiles => 'OC1=C(C=CC=C1)C(CO)O', iupac => '1-(2-hydroxyphenyl)ethane-1,2-diol', AUTHOR => 1 }, { smiles => '[SeH]OCCOO', iupac => '2-(selanyloxy)ethane-1-peroxol' }, { smiles => 'NCC(C)(OO)C', iupac => '1-amino-2-methylpropane-2-peroxol' }, { smiles => 'CS(=O)(=O)CCO', iupac => '2-(methanesulfonyl)ethan-1-ol' }, { smiles => 'O(O)C1C(CCCC1)(O)OOC1C(CCCC1)=O', iupac => '2-[(2-hydroperoxy-1-hydroxycyclohexyl)peroxy]cyclohexan-1-one' }, { smiles => 'NCCO', iupac => '2-aminoethan-1-ol' }, { smiles => 'C(C)OOCCOC', iupac => '1-(ethylperoxy)-2-methoxyethane', AUTHOR => 1 }, { smiles => 'COCCCSC', iupac => '1-methoxy-3-(methylsulfanyl)propane', AUTHOR => 1 }, { smiles => 'CSSC(=CCCC)SC', iupac => '1-(methyldisulfanyl)-1-(methylsulfanyl)pent-1-ene', AUTHOR => 1 }, { smiles => 'CO[Si](CCCS)(OC)OC', iupac => '3-(trimethoxysilyl)propane-1-thiol', AUTHOR => 1 }, { smiles => 'C(C)SC=C(SCCC)SCCC', iupac => '1-{[2-(ethylsulfanyl)-1-(propylsulfanyl)ethen-1-yl]sulfanyl}propane', AUTHOR => 1 }, { smiles => 'CC(CC)N(C(C)(CC)O)C(C)CC', iupac => '2-[di(butan-2-yl)amino]butan-2-ol' }, { smiles => 'N[SiH](C1(CC(CC1)O)[SiH2]CN)C', iupac => '3-[amino(methyl)silyl]-3-[(aminomethyl)silyl]cyclopentan-1-ol', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; if( $case->{AUTHOR} && $ok ) { diag 'test supposed to fail with AUTHOR_TESTING' . ( $case->{AUTHOR} !~ /^1$/ ? ': ' . $case->{AUTHOR} : '' ); } } ChemOnomatopist-0.10.0/t/58_alphanumeric.t000066400000000000000000000026141463750375500203370ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-45.5 { smiles => 'BrC1=C(C=C(C2=CC=CC=C12)Cl)CCOCC1=C(C2=CC=CC=C2C(=C1)Br)Br', iupac => '1-bromo-4-chloro-2-{2-[(1,4-dibromonaphthalen-2-yl)methoxy]ethyl}naphthalene', AUTHOR => 1 }, { smiles => 'BrC1=C(NC2=C(C=C(C=C2)Br)Br)C=CC(=C1)Cl', iupac => '2-bromo-4-chloro-N-(2,4-dibromophenyl)aniline', AUTHOR => 1 }, # { smiles => '', iupac => '13-bromo-14-chloro-2-(3,4-dibromonaphthalen-2-yl)-1(2)-naphthalena-3,5(1,4),7(1)-tribenzenaheptaphane' }, # FIXME: Not parsed by OPSIN { smiles => 'FC(C(C)F)C(CCC(=O)O)C(C(C)[N+](=O)[O-])[N+](=O)[O-]', iupac => '4-(1,2-difluoropropyl)-5,6-dinitroheptanoic acid' }, { smiles => '[81Br]C(C(C(CC(=O)O)C(C)C(C)[81Br])[N+](=O)[O-])C', iupac => '5-(81Br)bromo-3-[3-(81Br)bromobutan-2-yl]-4-nitrohexanoic acid' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; if( $case->{AUTHOR} && $ok ) { diag 'test supposed to fail with AUTHOR_TESTING' . ( $case->{AUTHOR} !~ /^1$/ ? ': ' . $case->{AUTHOR} : '' ); } } ChemOnomatopist-0.10.0/t/59_ABA.t000066400000000000000000000024451463750375500162550ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-21.2.3.1 { smiles => '[SnH3]O[SnH2]O[SnH2]O[SnH3]', iupac => 'tetrastannoxane' }, { smiles => '[SiH3]S[SiH3]', iupac => 'disilathiane' }, { smiles => 'SOS', iupac => 'dithioxane' }, { smiles => 'P[Se]P', iupac => 'diphosphaselenane' }, { smiles => '[SiH3]N[SiH3]', iupac => 'N-silylsilanamine', AUTHOR => 1 }, # From BBv3 P-68.1.1.2.2 { smiles => 'CB(OB(C)C)C', iupac => 'tetramethyldiboroxane' }, { smiles => '[AlH2]O[AlH]O[AlH2]', iupac => 'trialuminoxane' }, { smiles => 'CBNBC', iupac => '1-methyl-N-(methylboranyl)boranamine', AUTHOR => 1 }, { smiles => 'CSOS', iupac => 'methyldithioxane' }, # From BBv3 P-68.4.2.1 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; if( $case->{AUTHOR} && $ok ) { diag 'test supposed to fail with AUTHOR_TESTING' . ( $case->{AUTHOR} !~ /^1$/ ? ': ' . $case->{AUTHOR} : '' ); } } ChemOnomatopist-0.10.0/t/60_aceylene.t000066400000000000000000000016041463750375500174430ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-25.1.2.7 { smiles => 'C1=CC2=CC=CC3=CC=CC1=C23', iupac => 'acenaphthylene' }, { smiles => 'C1=CC2=CC=CC3=CC4=CC=CC=C4C1=C23', iupac => 'aceanthrylene' }, { smiles => 'C1=CC=C2C=CC3=CC4=CC=CC=C4C1=C23', iupac => 'acephenanthrylene' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; if( $case->{AUTHOR} && $ok ) { diag 'test supposed to fail with AUTHOR_TESTING' . ( $case->{AUTHOR} !~ /^1$/ ? ': ' . $case->{AUTHOR} : '' ); } } ChemOnomatopist-0.10.0/t/61_zwitterionic.t000066400000000000000000000012241463750375500204070ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'C(C)#[N+][N-]C', iupac => '2-ethylidyne-1-methylhydrazin-2-ium-1-ide' }, # From BBv3 P-74.2.2.2.1.1 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/62_hydro.t000066400000000000000000000065751463750375500170210ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # BBv3 P-31.2.2 { smiles => 'C1(=CC=CC2=CC=CC=C12)C1CC2=CC=CC=C2CC1', iupac => '1\',2\',3\',4\'-tetrahydro-1,2\'-binaphthalene', AUTHOR => 1 }, { smiles => 'N1=CCCCC=C1', iupac => '4,5-dihydro-3H-azepine' }, { smiles => 'N=1CCCC1', iupac => '3,4-dihydro-2H-pyrrole' }, { smiles => 'C1#CC=CC=C1', iupac => '1,2-didehydrobenzene', AUTHOR => 1 }, # BBv3 P-31.2.3.1 { smiles => 'C1=CCC=CC1', iupac => 'cyclohexa-1,4-diene' }, { smiles => 'C1=CCCCC1', iupac => 'cyclohexene' }, { smiles => 'N1CC=CC=C1', iupac => '1,2-dihydropyridine' }, { smiles => 'S1C=CNCCC1', iupac => '4,5,6,7-tetrahydro-1,4-thiazepine' }, { smiles => 'N1CC=CC=CC1', iupac => '2,7-dihydro-1H-azepine' }, { smiles => 'P1CCC=C1', iupac => '2,3-dihydro-1H-phosphole', AUTHOR => 1 }, # BBv3 P-31.2.3.2 { smiles => 'P1C=CC=C1', iupac => '1H-phosphole', AUTHOR => 1 }, { smiles => 'P1CCCC1', iupac => 'phospholane' }, { smiles => '[SiH]1=CC=CC=C1', iupac => 'siline' }, { smiles => '[SiH2]1CCCCC1', iupac => 'silinane' }, { smiles => 'O1C=CC=C1', iupac => 'furan' }, { smiles => 'O1CCCC1', iupac => 'oxolane' }, { smiles => 'N1=CC=CC=C1', iupac => 'pyridine' }, { smiles => 'N1CCCCC1', iupac => 'piperidine' }, { smiles => 'O1CC=CC=C1', iupac => '2H-pyran' }, { smiles => 'O1CCCCC1', iupac => 'oxane' }, { smiles => 'S1C=CN=CC=C1', iupac => '1,4-thiazepine' }, { smiles => 'S1CCNCCC1', iupac => '1,4-thiazepane' }, # BBv3 P-31.2.3.3.1 { smiles => 'C1CCC2=CC=CC=C12', iupac => '2,3-dihydro-1H-indene' }, { smiles => 'N1CCC2=CC=CC=C12', iupac => '2,3-dihydro-1H-indole' }, { smiles => 'C1NCC2=CC=CC=C12', iupac => '2,3-dihydro-1H-isoindole' }, { smiles => 'O1CCCC2=C1C=CC=C2', iupac => '3,4-dihydro-2H-1-benzopyran' }, { smiles => 'S1CCCC2=C1C=CC=C2', iupac => '3,4-dihydro-2H-1-benzothiopyran' }, { smiles => '[Se]1CCCC2=C1C=CC=C2', iupac => '3,4-dihydro-2H-1-benzoselenopyran' }, { smiles => '[Te]1CCCC2=C1C=CC=C2', iupac => '3,4-dihydro-2H-1-benzotelluropyran' }, { smiles => 'C1OCCC2=C1C=CC=C2', iupac => '3,4-dihydro-1H-2-benzopyran' }, { smiles => 'C1SCCC2=C1C=CC=C2', iupac => '3,4-dihydro-1H-2-benzothiopyran' }, { smiles => 'C1[Se]CCC2=C1C=CC=C2', iupac => '3,4-dihydro-1H-2-benzoselenopyran' }, { smiles => 'C1[Te]CCC2=C1C=CC=C2', iupac => '3,4-dihydro-1H-2-benzotelluropyran' }, # BBv3 P-31.2.3.3.2 { smiles => 'C1C=CCC2=CC=CC=C12', iupac => '1,4-dihydronaphthalene' }, { smiles => 'C1=CC=CC2=C1C=CCCC2', iupac => '6,7-dihydro-5H-benzo[7]annulene', AUTHOR => 1 }, { smiles => 'C1CCCC2CCCCC12', iupac => 'decahydronaphthalene' }, { smiles => 'C1CCCC2CC3CCCCC3CC12', iupac => 'tetradecahydroanthracene' }, { smiles => 'C1CCC2CC3C1C1C4C5C(C(C3CCC2)C1)CCCC(C5)CCC4', iupac => 'octadecahydro-7,14-methano-4,6:8,10-dipropanodicyclohepta[a,d][8]annulene', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/63_sulfonic_acids.t000066400000000000000000000111601463750375500206440ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 Table 4.3. Suffixes were turned to full names by prepending them with 'methane' { smiles => 'CS(=O)(=O)O', iupac => 'methanesulfonic acid' }, { smiles => 'CS(=O)(=O)OO', iupac => 'methanesulfonoperoxoic acid' }, { smiles => 'CS(=O)(=S)OO', iupac => 'methanesulfonoperoxothioic OO-acid' }, { smiles => 'CS(=O)(=[Se])OO', iupac => 'methanesulfonoperoxoselenoic OO-acid' }, { smiles => 'CS(=O)(=O)SO', iupac => 'methanesulfono(thioperoxoic) SO-acid' }, { smiles => 'CS(=O)(=O)OS', iupac => 'methanesulfono(thioperoxoic) OS-acid', AUTHOR => 1 }, { smiles => 'CS(=S)(=S)OO', iupac => 'methanesulfonoperoxodithioic OO-acid' }, { smiles => 'CS(=O)(=S)SO', iupac => 'methanesulfonothio(thioperoxoic) SO-acid' }, { smiles => 'CS(=S)(=[Se])OO', iupac => 'methanesulfonoperoxoselenothioic OO-acid' }, { smiles => 'CS(=[Se])(=[Se])SS', iupac => 'methanesulfono(dithioperoxo)diselenoic acid', AUTHOR => 1 }, { smiles => 'CS(=S)(=S)[Se][SeH]', iupac => 'methanesulfono(diselenoperoxo)dithioic acid' }, { smiles => 'CS(=[Te])(=[Te])[Te][TeH]', iupac => 'methanesulfono(ditelluroperoxo)ditelluroic acid' }, { smiles => 'CS(=O)(O)=S', iupac => 'methanesulfonothioic O-acid' }, { smiles => 'CS(=O)(S)=O', iupac => 'methanesulfonothioic S-acid' }, { smiles => 'CS(=O)([SeH])=O', iupac => 'methanesulfonoselenoic Se-acid' }, { smiles => 'CS(O)(=S)=S', iupac => 'methanesulfonodithioic O-acid' }, { smiles => 'CS(S)(=S)=O', iupac => 'methanesulfonodithioic S-acid' }, { smiles => 'CS(O)(=[Se])=[Te]', iupac => 'methanesulfonoselenotelluroic O-acid' }, { smiles => 'CS([SeH])(=O)=[Te]', iupac => 'methanesulfonoselenotelluroic Se-acid' }, { smiles => 'CS([TeH])(=[Se])=O', iupac => 'methanesulfonoselenotelluroic Te-acid' }, { smiles => 'CS(=S)(=S)S', iupac => 'methanesulfonotrithioic acid' }, { smiles => 'CS(=O)(O)=N', iupac => 'methanesulfonimidic acid' }, { smiles => 'CS(=O)(=N)OO', iupac => 'methanesulfonimidoperoxoic acid' }, { smiles => 'CS(=S)(=N)OO', iupac => 'methanesulfonimidoperoxothioic OO-acid' }, { smiles => 'CS(=O)(=N)SO', iupac => 'methanesulfonimido(thioperoxoic) SO-acid' }, { smiles => 'CS(=O)(=N)OS', iupac => 'methanesulfonimido(thioperoxoic) OS-acid', AUTHOR => 1 }, { smiles => 'CS(O)(=N)=S', iupac => 'methanesulfonimidothioic O-acid' }, { smiles => 'CS(S)(=N)=O', iupac => 'methanesulfonimidothioic S-acid' }, { smiles => 'CS(=N)(=S)S', iupac => 'methanesulfonimidodithioic acid' }, { smiles => 'CS(=N)(=[Se])S', iupac => 'methanesulfonimidoselenothioic S-acid' }, { smiles => 'CS(=N)(=S)[SeH]', iupac => 'methanesulfonimidoselenothioic Se-acid' }, { smiles => 'CS(=N)(=[Te])[TeH]', iupac => 'methanesulfonimidoditelluroic acid' }, { smiles => 'CS(O)(=N)=N', iupac => 'methanesulfonodiimidic acid' }, { smiles => 'CS(=N)(=N)OO', iupac => 'methanesulfonodiimidoperoxoic acid' }, { smiles => 'CS(=N)(=N)SO', iupac => 'methanesulfonodiimido(thioperoxoic) SO-acid' }, { smiles => 'CS(=N)(=N)OS', iupac => 'methanesulfonodiimido(thioperoxoic) OS-acid', AUTHOR => 1 }, { smiles => 'CS(=N)(=N)[SeH]', iupac => 'methanesulfonodiimidoselenoic acid' }, { smiles => 'CS(=N)(=N)[TeH]', iupac => 'methanesulfonodiimidotelluroic acid' }, { smiles => 'CS(=O)(O)=NN', iupac => 'methanesulfonohydrazonic acid', AUTHOR => 1 }, { smiles => 'CS(=O)(=NN)OO', iupac => 'methanesulfonohydrazonoperoxoic acid', AUTHOR => 1 }, { smiles => 'CS(=NN)(OO)=S', iupac => 'methanesulfonohydrazonoperoxothioic acid', AUTHOR => 1 }, { smiles => 'CS(O)(=NN)=S', iupac => 'methanesulfonohydrazonothioic O-acid', AUTHOR => 1 }, { smiles => 'CS(S)(=NN)=O', iupac => 'methanesulfonohydrazonothioic S-acid', AUTHOR => 1 }, { smiles => 'CS(=NN)(=NN)O', iupac => 'methanesulfonodihyrazonic acid', AUTHOR => 1 }, { smiles => 'CS(=NN)(=NN)OO', iupac => 'methanesulfonodihydrazonoperoxoic acid', AUTHOR => 1 }, { smiles => 'CS(=NN)(=NN)SO', iupac => 'methanesulfonodihydrazono(thioperoxoic) SO-acid', AUTHOR => 1 }, { smiles => 'CS(=NN)(=NN)S', iupac => 'methanesulfonodihydrazonothioic acid', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/64_carboxylic_acids.t000066400000000000000000000104511463750375500211640ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 Table 4.3. Suffixes were turned to full names by prepending them with 'benzene' { smiles => 'C1(=CC=CC=C1)C(=O)O', iupac => 'benzenecarboxylic acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=O)OO', iupac => 'benzenecarboperoxoic acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=S)OO', iupac => 'benzenecarboperoxothioic OO-acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=[Se])OO', iupac => 'benzenecarboperoxoselenoic acid' }, { smiles => 'C1(=CC=CC=C1)C(=O)SO', iupac => 'benzenecarbo(thioperoxoic) SO-acid' }, { smiles => 'C1(=CC=CC=C1)C(=O)OS', iupac => 'benzenecarbo(thioperoxoic) OS-acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=O)[Se]O', iupac => 'benzenecarbo(selenoperoxoic) SeO-acid' }, { smiles => 'C1(=CC=CC=C1)C(=O)O[SeH]', iupac => 'benzenecarbo(selenoperoxoic) OSe-acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=S)SO', iupac => 'benzenecarbothio(thioperoxoic) SO-acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=S)OS', iupac => 'benzenecarbothio(thioperoxoic) OS-acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=[Se])OS', iupac => 'benzenecarboseleno(thioperoxoic) OS-acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=S)[Se]O', iupac => 'benzenecarbo(selenoperoxo)thioic SeO-acid' }, { smiles => 'C1(=CC=CC=C1)C(=S)O[SeH]', iupac => 'benzenecarbo(selenoperoxo)thioic OSe-acid' }, { smiles => 'C1(=CC=CC=C1)C(=S)SS', iupac => 'benzenecarbo(dithioperoxo)thioic acid' }, { smiles => 'C1(=CC=CC=C1)C(=[Se])[Se][SeH]', iupac => 'benzenecarbo(diselenoperoxo)selenoic acid' }, { smiles => 'C1(=CC=CC=C1)C(=[Te])[Te][TeH]', iupac => 'benzenecarbo(ditelluroperoxo)telluroic acid' }, { smiles => 'C1(=CC=CC=C1)C(O)=S', iupac => 'benzenecarbothioic O-acid' }, { smiles => 'C1(=CC=CC=C1)C(S)=O', iupac => 'benzenecarbothioic S-acid' }, { smiles => 'C1(=CC=CC=C1)C(=S)S', iupac => 'benzenecarbodithioic acid' }, { smiles => 'C1(=CC=CC=C1)C(=[Se])S', iupac => 'benzenecarboselenothioic S-acid' }, { smiles => 'C1(=CC=CC=C1)C(=S)[SeH]', iupac => 'benzenecarboselenothioic Se-acid' }, { smiles => 'C1(=CC=CC=C1)C(=[Se])[SeH]', iupac => 'benzenecarbodiselenoic acid' }, { smiles => 'C1(=CC=CC=C1)C(=[Te])[SeH]', iupac => 'benzenecarboselenotelluroic Se-acid' }, { smiles => 'C1(=CC=CC=C1)C(=[Te])[TeH]', iupac => 'benzenecarboditelluroic acid' }, { smiles => 'C1(=CC=CC=C1)C(O)=N', iupac => 'benzenecarboximidic acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=N)OO', iupac => 'benzenecarboximidoperoxoic acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=N)SO', iupac => 'benzenecarboximido(thioperoxoic) SO-acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=N)OS', iupac => 'benzenecarboximido(thioperoxoic) OS-acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=N)SS', iupac => 'benzenecarbo(dithioperoxo)imidic acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=N)[Se]S', iupac => 'benzenecarboximido(selenothioperoxoic) SeS-acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=N)S', iupac => 'benzenecarboximidothioic acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=N)[SeH]', iupac => 'benzenecarboximidoselenoic acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=N)[TeH]', iupac => 'benzenecarboximidotelluroic acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(O)=NN', iupac => 'benzenecarbohydrazonic acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=NN)OO', iupac => 'benzenecarbohydrazonoperoxoic acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=NN)SO', iupac => 'benzenecarbohydrazono(thioperoxoic) SO-acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=NN)OS', iupac => 'benzenecarbohydrazono(thioperoxoic) OS-acid', AUTHOR => 1 }, { smiles => 'C1(=CC=CC=C1)C(=NN)[Te][TeH]', iupac => 'benzenecarbo(ditelluroperoxo)hydrazonic acid', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/65_noncarbon_oxoacids.t000066400000000000000000000066631463750375500215450ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-67.1.1.1 { smiles => '[AsH2](O)=O', iupac => 'arsinic acid' }, { smiles => '[AsH2]O', iupac => 'arsinous acid' }, { smiles => '[AsH](O)(O)=O', iupac => 'arsonic acid' }, { smiles => '[AsH](O)O', iupac => 'arsonous acid' }, { smiles => '[As](O)(O)(O)=O', iupac => 'arsoric acid' }, { smiles => '[As](O)(O)O', iupac => 'arsorous acid' }, { smiles => '[NH2+](O)[O-]', iupac => 'azinic acid' }, { smiles => '[NH+](O)(O)[O-]', iupac => 'azonic acid' }, { smiles => 'N(O)O', iupac => 'azonous acid', AUTHOR => 1 }, { smiles => '[N+](O)(O)(O)[O-]', iupac => 'nitroric acid' }, { smiles => 'N(O)(O)O', iupac => 'azorous acid', AUTHOR => 1 }, { smiles => 'B(O)(O)O', iupac => 'boric acid' }, { smiles => 'BO', iupac => 'borinic acid' }, { smiles => 'B(O)O', iupac => 'boronic acid' }, { smiles => 'Br(=O)(=O)O', iupac => 'bromic acid' }, { smiles => 'Br(=O)O', iupac => 'bromous acid', AUTHOR => 1 }, { smiles => 'Cl(=O)(=O)O', iupac => 'chloric acid' }, { smiles => 'Cl(=O)O', iupac => 'chlorous acid', AUTHOR => 1 }, { smiles => 'BrO', iupac => 'hypobromous acid' }, { smiles => 'ClO', iupac => 'hypochlorous acid' }, { smiles => 'FO', iupac => 'hypofluorous acid' }, { smiles => 'IO', iupac => 'hypoiodous acid' }, { smiles => 'I(=O)(=O)O', iupac => 'iodic acid' }, { smiles => 'I(=O)O', iupac => 'iodous acid', AUTHOR => 1 }, { smiles => '[N+](=O)(O)[O-]', iupac => 'nitric acid', AUTHOR => 1 }, { smiles => 'N(=O)O', iupac => 'nitrous acid', AUTHOR => 1 }, { smiles => 'Br(=O)(=O)(=O)O', iupac => 'perbromic acid' }, { smiles => 'Cl(=O)(=O)(=O)O', iupac => 'perchloric acid' }, { smiles => 'F(=O)(=O)(=O)O', iupac => 'perfluoric acid' }, { smiles => 'I(=O)(=O)(=O)O', iupac => 'periodic acid' }, { smiles => '[PH2](O)=O', iupac => 'phosphinic acid' }, { smiles => 'PO', iupac => 'phosphinous acid' }, { smiles => 'P(O)(O)=O', iupac => 'phosphonic acid' }, { smiles => 'P(O)O', iupac => 'phosphonous acid' }, { smiles => 'P(O)(O)(O)=O', iupac => 'phosphoric acid' }, { smiles => 'P(O)(O)O', iupac => 'phosphorous acid' }, { smiles => '[Se](O)(O)(=O)=O', iupac => 'selenic acid' }, { smiles => '[Se](O)(O)=O', iupac => 'selenous acid' }, { smiles => '[Si](O)(O)(O)O', iupac => 'silicic acid' }, { smiles => '[SbH2](O)=O', iupac => 'stibinic acid' }, { smiles => '[SbH2]O', iupac => 'stibinous acid' }, { smiles => '[SbH](O)(O)=O', iupac => 'stibonic acid' }, { smiles => '[SbH](O)O', iupac => 'stibonous acid' }, { smiles => '[Sb](O)(O)(O)=O', iupac => 'stiboric acid' }, { smiles => '[Sb](O)(O)O', iupac => 'stiborous acid' }, { smiles => 'S(O)(O)(=O)=O', iupac => 'sulfuric acid' }, { smiles => 'S(O)(O)=O', iupac => 'sulfurous acid' }, { smiles => '[Te](O)(O)(=O)=O', iupac => 'telluric acid' }, { smiles => '[Te](O)(O)=O', iupac => 'tellurous acid' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/66_chalcogen_chains.t000066400000000000000000000025161463750375500211370ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-68.4.2.2 { smiles => 'COOS', iupac => 'methyldioxidanethiol', AUTHOR => 1 }, { smiles => 'COOOS', iupac => 'methyltrioxidanethiol' }, { smiles => '', iupac => 'methyloxidane-SO-thioperoxol', AUTHOR => 1 }, { smiles => 'CSSO', iupac => 'methyldisulfanol', AUTHOR => 1 }, { smiles => 'CSSS[SeH]', iupac => 'methyltrisulfaneselenol' }, { smiles => 'COOSS', iupac => 'methyldioxidanedithioperoxol', AUTHOR => 1 }, { smiles => 'COS[Se][SeH]', iupac => 'methoxysulfanediselenoperoxol', AUTHOR => 1 }, { smiles => 'CSSOO', iupac => 'methyldisulfaneperoxol', AUTHOR => 1 }, { smiles => 'CSOSO', iupac => '[(methylsulfanyl)oxy]sulfanol', AUTHOR => 1 }, { smiles => '[Se](O[Se]C1=CC=C(C=C1)O)C1=CC=C(C=C1)O', iupac => '4,4\'-diselenoxanediyldiphenol', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.10.0/t/67_sulfinamides_and_sulfonamides.t000066400000000000000000000017601463750375500237460ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv3 P-66.1.1.2 { smiles => 'CS(=O)(=O)N', iupac => 'methanesulfonamide' }, { smiles => 'CC(CC)S(=O)N', iupac => 'butane-2-sulfinamide' }, { smiles => 'O1C(=CC=C1)[Se](=O)N', iupac => 'furan-2-seleninamide' }, { smiles => 'N1(CCCC1)S(=O)(=O)N', iupac => 'pyrrolidine-1-sulfonamide' }, # From BBv3 P-66.1.1.4.2 { smiles => 'CNS(=O)C=1C=C2C=CC(=CC2=CC1)C(=O)O', iupac => '6-[(methylamino)sulfinyl]naphthalene-2-carboxylic acid' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; }