pax_global_header00006660000000000000000000000064145201211610014503gustar00rootroot0000000000000052 comment=bf332547a10370e4a48b779168a0e4ba81c6b813 ChemOnomatopist-0.6.1/000077500000000000000000000000001452012116100146205ustar00rootroot00000000000000ChemOnomatopist-0.6.1/.gitignore000066400000000000000000000000731452012116100166100ustar00rootroot00000000000000.build ChemOnomatopist-* container.sif nytprof nytprof.out ChemOnomatopist-0.6.1/Changes000066400000000000000000000025611452012116100161170ustar00rootroot000000000000000.6.1 2023-10-31 - Implement partial support for porphyrins. - Implement support for anilino groups. - Simplify detection of ring membership. - Fix issues with cyclic amides. 0.6.0 2023-10-13 - Implement support for polyacenes, polyaphenes, xanthenes and similar compounds. - Implement support for amides, ethers, hydrazides, hydrazines and sulfo groups. - Fix many issues with parent chain detection. - Implement '--debug' mode in 'chemonomatopist' executable. 0.5.0 2023-06-16 - Implement support for bicyclic compounds. - Implement support for ketone analogues, nitro, nitroso and XO3 groups. - Implement partial support for guanidine. - Implement '--check' mode in 'chemonomatopist' executable. 0.4.0 2023-05-30 - Implement support for monocycles and monospiro compounds. - Implement support for some nitrogen- and sulfur-based compound classes. 0.3.0 2023-05-10 - 99.6% correctness rate for unbranched and branched saturated acyclic hydrocarbons for PubChem database downloaded on 2022-04-21. - Implement support for some compound classes having oxygen atoms. - Implement support for heteroatoms. 0.2.0 2022-05-25 - 97.5% correctness rate for unbranched and branched saturated acyclic hydrocarbons for PubChem database downloaded on 2022-04-21. 0.1.0 2022-01-18 - Initial release. ChemOnomatopist-0.6.1/LICENSE000066400000000000000000000027331452012116100156320ustar00rootroot00000000000000Copyright (c) The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ChemOnomatopist-0.6.1/README.md000066400000000000000000000027251452012116100161050ustar00rootroot00000000000000# ChemOnomatopist *ChemOnomatopist* is a tool to derive IUPAC systematic names for chemical structures: $ echo C=C1C=CC=C1 | chemonomatopist C=C1C=CC=C1 5-methylidenecyclopenta-1,3-diene *ChemOnomatopist* analyses chemical graphs to determine IUPAC names according to the [Nomenclature of Organic Chemistry. IUPAC Recommendations and Preferred Names 2013](https://iupac.qmul.ac.uk/BlueBook/PDF/BlueBookV2.pdf), also known as the Blue Book. ## Installation *ChemOnomatopist* is written in Perl. The easiest way to install it is using [Dist::Zilla](https://metacpan.org/release/Dist-Zilla): $ git clone https://github.com/merkys/ChemOnomatopist $ dzil install ## Support This tool is under development. Support for the variety of chemical compounds is underway. Currently *ChemOnomatopist* supports: * Branched acyclic hydrocarbons * Monocycles * Monospiro compounds * Bicyclic compounds * Polyacenes * Polyaphenes * Xanthenes and similar compounds Issues known to produce names deviating from IUPAC guidelines: * Possibly incorrect seniority for *tert* substituents (Blue Book is not quite clear about them) * Incorrect addition/omission of unambiguous locants * Incorrect addition/omission of parentheses * Monocycles with multiple substituents are sometimes named incorrectly * Linear compounds with heteroatoms are usually named incorrectly ## Contributors * MiglÄ— UrbonaitÄ— ## License *ChemOnomatopist* is free software licensed under BSD-3-Clause license. ChemOnomatopist-0.6.1/bin/000077500000000000000000000000001452012116100153705ustar00rootroot00000000000000ChemOnomatopist-0.6.1/bin/chemonomatopist000077500000000000000000000044321452012116100205320ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; # VERSION use ChemOnomatopist; use File::Basename qw( basename ); use Getopt::Long::Descriptive; use List::Util qw( sum ); =head1 NAME chemonomatopist - derive IUPAC systematic names for chemical structures =head1 SYNOPSYS chemonomatopist [args] [files] =head1 DESCRIPTION ChemOnomatopist analyses chemical graphs to determine IUPAC names according to the "Nomenclature of Organic Chemistry. IUPAC Recommendations and Preferred Names 2013", also known as the Blue Book. =cut my $basename = basename $0; my( $opt, $usage ) = describe_options( <<"END" . 'OPTIONS', USAGE $basename [] [] DESCRIPTION $basename derives IUPAC systematic names for chemical structures END [ 'check', 'treat the input as list of SMILES and IUPAC names and check whether given and generated names match' ], [ 'cautious', 'avoid experimental features, refuse processing only partially supported compounds' ], [ 'debug', 'turn on the debug mode' ], [], [ 'help', 'print usage message and exit', { shortcircuit => 1 } ], ); if( $opt->help ) { print $usage->text; exit; } $ChemOnomatopist::CAUTIOUS = 1 if $opt->cautious; $ChemOnomatopist::DEBUG = 1 if $opt->debug; my %counts = ( OK => 0, FAIL => 0, ERROR => 0 ); local $\ = "\n"; while (<>) { chomp; my( $id, $given_name, $SMILES ); if( $opt->check ) { ( $id, $given_name, $SMILES ) = split "\t", $_; } else { $SMILES = $_; } my $derived_name; eval { $derived_name = ChemOnomatopist::get_name( $SMILES ); }; $@ =~ s/\n$// if $@; if( $opt->check ) { if( $@ ) { print $id, "\t", 'ERROR', "\t", $@; $counts{ERROR}++; } elsif( $given_name ne $derived_name ) { print $id, "\t", 'FAIL', "\t", $given_name, "\t", $derived_name; $counts{FAIL}++; } else { print $id, "\t", 'OK', "\t", $derived_name; $counts{OK}++; } } else { if( $@ ) { print STDERR $SMILES, "\t", $@; } else { print $SMILES, "\t", $derived_name; } } } if( $opt->check ) { print ''; for (reverse sort keys %counts) { print "$_:\t", $counts{$_}; } print "TOTAL:\t", sum values %counts; } ChemOnomatopist-0.6.1/container.def000066400000000000000000000010071452012116100172600ustar00rootroot00000000000000# Definition for an Apptainer/Singularity container used to check for missing dependencies. Bootstrap: docker From: debian:unstable %post apt-get update apt-get install --yes libalgorithm-combinatorics-perl cpanminus git libchemistry-opensmiles-perl libdbi-perl libdist-zilla-perl libdist-zilla-plugin-autometaresources-perl libdist-zilla-plugin-ourpkgversion-perl libgraph-perl libipc-run3-perl libset-object-perl git clone https://github.com/merkys/ChemOnomatopist cd ChemOnomatopist dzil test ChemOnomatopist-0.6.1/dist.ini000066400000000000000000000015071452012116100162670ustar00rootroot00000000000000name = ChemOnomatopist author = Andrius Merkys license = BSD copyright_holder = Andrius Merkys copyright_year = 2021-2023 version = 0.6.1 [@Filter] -bundle = @Basic -remove = License [AutoMetaResources] homepage = https://search.cpan.org/dist/%{dist} repository.github = user:merkys bugtracker.github = user:merkys [MetaJSON] [OurPkgVersion] [Prereqs] Algorithm::Combinatorics = 0 Chemistry::OpenSMILES = 0.8.6 Clone = 0 Getopt::Long::Descriptive = 0 Graph = 0.9726 Graph::MoreUtils = 0 Graph::Nauty = 0 List::Util = 0 Scalar::Util = 0 Set::Object = 0 [Prereqs / Test] -phase = test Algorithm::Combinatorics = 0 DBI = 0 IPC::Run3 = 0 Test::More = 0 ChemOnomatopist-0.6.1/lib/000077500000000000000000000000001452012116100153665ustar00rootroot00000000000000ChemOnomatopist-0.6.1/lib/ChemOnomatopist.pm000066400000000000000000002151461452012116100210460ustar00rootroot00000000000000package ChemOnomatopist; use strict; use warnings; # ABSTRACT: Give molecule a name # VERSION use Algorithm::Combinatorics qw( combinations ); use ChemOnomatopist::Chain; use ChemOnomatopist::Chain::Amide; use ChemOnomatopist::Chain::Amine; use ChemOnomatopist::Chain::Bicycle; use ChemOnomatopist::Chain::Carboxamide; use ChemOnomatopist::Chain::Circular; use ChemOnomatopist::Chain::FromHalves; use ChemOnomatopist::Chain::Imino; use ChemOnomatopist::Chain::Monocycle; use ChemOnomatopist::Chain::Monospiro; use ChemOnomatopist::Chain::Phenanthrene; use ChemOnomatopist::Chain::Polyacene; use ChemOnomatopist::Chain::Polyaphene; use ChemOnomatopist::Chain::Porphyrin; use ChemOnomatopist::Chain::Xanthene; use ChemOnomatopist::ChainHalf; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Group; use ChemOnomatopist::Group::AcylHalide; use ChemOnomatopist::Group::Aldehyde; use ChemOnomatopist::Group::Amide; use ChemOnomatopist::Group::Amine; use ChemOnomatopist::Group::Carboxyl; use ChemOnomatopist::Group::Cyanide; use ChemOnomatopist::Group::Ester; use ChemOnomatopist::Group::Ether; use ChemOnomatopist::Group::Guanidine; use ChemOnomatopist::Group::Hydrazide; use ChemOnomatopist::Group::Hydrazine; use ChemOnomatopist::Group::Hydroperoxide; use ChemOnomatopist::Group::Hydroxy; use ChemOnomatopist::Group::Imino; use ChemOnomatopist::Group::Ketone; use ChemOnomatopist::Group::Nitro; use ChemOnomatopist::Group::Nitroso; use ChemOnomatopist::Group::SulfinicAcid; use ChemOnomatopist::Group::Sulfinyl; use ChemOnomatopist::Group::SulfonicAcid; use ChemOnomatopist::Group::Sulfonyl; use ChemOnomatopist::Group::XO3; use ChemOnomatopist::MolecularGraph; use ChemOnomatopist::Name; use ChemOnomatopist::Name::Part::AlkaneANSuffix; use ChemOnomatopist::Name::Part::Stem; use ChemOnomatopist::Util qw( copy zip ); use ChemOnomatopist::Util::Graph qw( BFS_calculate_chain_length BFS_is_chain_branched cyclic_components graph_center graph_cycle_core graph_cycles graph_has_cycle graph_longest_paths_from_vertex graph_path_between_vertices graph_replace graph_without_edge_attributes subgraph tree_branch_positions tree_number_of_branches ); use ChemOnomatopist::Util::SMILES qw( cycle_SMILES_explicit ); use Chemistry::OpenSMILES qw( %bond_symbol_to_order is_double_bond is_single_bond is_triple_bond ); use Graph::MoreUtils qw( SSSR ); use Graph::Nauty qw( are_isomorphic ); use Graph::Traversal::DFS; use Graph::Undirected; use List::Util qw( all any first max min pairs sum0 uniq ); use Scalar::Util qw( blessed ); use Set::Object qw( set ); no warnings 'recursion'; our $CAUTIOUS = ''; our $DEBUG = ''; sub get_name { my( $what ) = @_; # Detect the type of the input data my( $graph ); if( blessed $what && $what->isa( Graph::Undirected:: ) ) { $graph = ChemOnomatopist::MolecularGraph->new( $what ); } else { # Assume SMILES string die "cannot handle stereochemistry now\n" if $what =~ /[\\\/@]/; require Chemistry::OpenSMILES::Parser; my $parser = Chemistry::OpenSMILES::Parser->new; my @graphs = map { ChemOnomatopist::MolecularGraph->new( $_ ) } $parser->parse( $what ); die "separate molecular entities are not handled yet\n" if @graphs > 1; $graph = shift @graphs; } die "nothing supplied for get_name()\n" unless $graph; if( any { $_ ne 'H' && !exists $elements{$_} } map { ucfirst $_->{symbol} } $graph->vertices ) { die "unknown elements detected\n"; } find_groups( $graph ); my $main_chain = select_mainchain( $graph ); return get_mainchain_name( $graph, $main_chain ); } # get_sidechain_name() receives a graph and a position to start the chain in it. # From that position it finds the longest chain and returns the constructed name. sub get_sidechain_name { my( $graph, $parent, $start ) = @_; # Record the type of parent bond my $parent_bond = '-' if $parent; if( blessed $start && $start->isa( ChemOnomatopist::Chain:: ) ) { my $attachment_point = first { $graph->has_edge( $parent, $_ ) } $start->vertices; if( $attachment_point && $graph->has_edge_attribute( $parent, $attachment_point, 'bond' ) ) { $parent_bond = $graph->get_edge_attribute( $parent, $attachment_point, 'bond' ); } } else { if( $graph->has_edge_attribute( $parent, $start, 'bond' ) ) { $parent_bond = $graph->get_edge_attribute( $parent, $start, 'bond' ); } } # Groups that cannot be included in the chain do not matter my $branches_at_start = grep { !blessed $_ || $_->is_carbon } grep { !$parent || $_ != $parent } $graph->neighbours( $start ); my $chain; if( blessed $start && $start->isa( ChemOnomatopist::Chain:: ) ) { $chain = $start; $chain->parent( $parent ) if $parent; } elsif( $graph->groups( $start ) ) { ( $chain ) = $graph->groups( $start ); $chain->parent( $parent ) if $parent; } else { $chain = select_sidechain( $graph, $parent, $start ); } my @chain = $chain->vertices; # Handle non-carbon substituents if( @chain == 1 && $graph->degree( @chain ) == 0 + defined $parent && !blessed $chain[0] && !is_element( $chain[0], 'C' ) && exists $elements{$chain[0]->{symbol}} ) { my $element = $elements{$chain[0]->{symbol}}->{prefix}; $element =~ s/a$/o/; # TODO: Is this a general rule? BBv2 seems silent. return ChemOnomatopist::Name::Part::Element->new( $element )->to_name; } # Collect heteroatoms my %heteroatoms; for (pairs zip $chain->heteroatoms, $chain->heteroatom_positions) { my( $element, $i ) = @$_; push @{$heteroatoms{$element}}, $i; } # Examine the attachments to the main chain: delete the edges # connecting them to the main chain, at the same time giving them # names according to their lengths via calls to get_sidechain_name() my %attachments; my %attachment_objects; my %isotopes; for my $i (0..$#chain) { my $atom = $chain[$i]; if( !blessed $atom && exists $atom->{isotope} ) { push @{$isotopes{$atom->{isotope} . element( $atom )}}, $i; } if( exists $atom->{h_isotope} && grep { defined $_ } @{$atom->{h_isotope}} ) { for (grep { defined $_ } @{$atom->{h_isotope}}) { push @{$isotopes{$_ . 'H'}}, $i; } } for my $neighbour ($graph->neighbours( $atom )) { next if any { $neighbour == $_ } @chain; # Skip atoms from this chain next if $parent && $neighbour == $parent; my $attachment_name = get_sidechain_name( $graph, $atom, $neighbour ); push @{$attachments{$attachment_name}}, $i; $attachment_objects{$attachment_name} = $attachment_name; } } # Collecting names of all the attachments my $name = ChemOnomatopist::Name->new; for my $attachment_name (sort { $a cmp $b } keys %attachments) { my $attachment = $attachment_objects{$attachment_name}; if( $chain->needs_substituent_locants ) { $name->append_locants( $chain->locants( @{$attachments{$attachment_name}} ) ); } if( @{$attachments{$attachment_name}} > 1 ) { my $number = IUPAC_numerical_multiplier( scalar @{$attachments{$attachment_name}} ); $number .= 'a' unless $number =~ /^(|\?|.*i)$/; $name->append_multiplier( $number ); # FIXME: More rules from BBv2 P-16.3.4 should be added if( $attachment->has_substituent_locant || # BBv2 P-16.3.4 (a) $attachment->starts_with_multiplier || # BBv2 P-16.3.4 (c) $attachment =~ /^dec/ || # BBv2 P-16.3.4 (d) $attachment =~ /^[0-9]/ ) { $attachment->bracket; } } else { if( $chain->needs_substituent_locants && !$attachment->is_enclosed && (!$attachment->is_simple || $attachment->starts_with_locant) ) { $attachment->bracket; } } $name .= $attachment; } # Collecting names of all heteroatoms for my $element (sort { $elements{$a}->{seniority} <=> $elements{$b}->{seniority} } keys %heteroatoms) { if( $chain->needs_heteroatom_locants ) { $name->append_locants( $chain->locants( @{$heteroatoms{$element}} ) ); } if( $chain->needs_heteroatom_names ) { if( @{$heteroatoms{$element}} > 1 ) { my $number = IUPAC_numerical_multiplier( scalar @{$heteroatoms{$element}} ); $number .= 'a' unless $number =~ /^(|\?|.*i)$/; $name .= $number; } if( $element eq 'S' ) { $name->append_element( 'sulfan' ); } else { $name->append_element( $elements{$element}->{prefix} ); } } } # Attaching isotopes my $isotopes = ''; for my $isotope (sort { cmp_isotopes( $a, $b ) } keys %isotopes) { if( $chain->needs_substituent_locants ) { # FIXME: Is this right? $isotopes .= join( ',', $chain->locants( @{$isotopes{$isotope}} ) ) . '-'; } $isotopes .= $isotope; $isotopes .= scalar @{$isotopes{$isotope}} if @{$isotopes{$isotope}} > 1 || $isotope =~ /H$/; } $name .= "($isotopes)" if $isotopes ne ''; if( $chain->isa( ChemOnomatopist::Chain::Circular:: ) || $chain->isa( ChemOnomatopist::Group:: ) ) { $chain->parent( $parent ) if $chain->can( 'parent' ); my $prefix = $chain->prefix; # All groups are most likely stems $prefix = ChemOnomatopist::Name::Part::Stem->new( $prefix )->to_name unless blessed $prefix; $name .= $prefix; } elsif( @chain == 1 && blessed $chain[0] && !$chain->isa( ChemOnomatopist::Chain::Ether:: ) ) { $chain[0]->parent( $parent ) if $chain[0]->can( 'parent' ); my $prefix = $chain[0]->prefix; # All group-containing chains are most likely stems $prefix = ChemOnomatopist::Name::Part::Stem->new( $prefix )->to_name unless blessed $prefix; if( $chain[0]->isa( ChemOnomatopist::Group::Sulfinyl:: ) ) { # BBv2 P-63.6 $name->[-1] =~ s/yl$/ane/; } $name .= $prefix; } else { if( $chain->isa( ChemOnomatopist::Chain::Ether:: ) && $name->has_locant ) { $name->bracket; } $chain->parent( $parent ) if $chain->can( 'parent' ); $name .= $chain->prefix; pop @$name if $name->[-1] eq 'e'; # FIXME: Dirty pop @$name if $name->[-1] eq 'an'; if( $branches_at_start > 1 ) { my( $branch_point ) = grep { $chain[$_] == $start } 0..$#chain; if( $branch_point || !$chain->is_saturated ) { # According to BBv2 P-29.2 (1) $name .= 'an' unless $name =~ /-(di|tri)?en$/; # FIXME: Dirty $name->append_substituent_locant( $branch_point + 1 ); } } $name .= 'yl' unless $name =~ /[oy]$/; $name->bracket if $name =~ /hydroxymethyl$/; # FIXME: Dirty } if( $chain->needs_multiple_bond_suffix ) { $name .= 'idene' if $parent_bond && $parent_bond eq '='; $name .= 'idyne' if $parent_bond && $parent_bond eq '#'; } $name = ChemOnomatopist::Name->new( 'acetyl' ) if $name eq '1-oxoethyl'; $name = ChemOnomatopist::Name->new( 'benzyl' ) if $name eq 'phenylmethyl'; # Detecting anilino- group if( @$name >= 2 && $name->[-2] eq 'phenyl' && $name->[-1] eq 'amino' ) { pop @$name; pop @$name; $name .= 'anilino'; } return $name; } sub get_mainchain_name { my( $graph, $chain ) = @_; my @vertices = $graph->vertices; my @chain = $chain->vertices; my @groups = most_senior_groups( $graph ); my $most_senior_group = blessed $groups[0] if @groups; # The following condition adjusts the seniority order by moving ethers below cycles if( $most_senior_group && $most_senior_group eq ChemOnomatopist::Group::Ether:: && $chain->isa( ChemOnomatopist::Chain::Circular:: ) ) { @groups = ( $chain ); $most_senior_group = blessed $chain; } # Collect the heteroatoms and isotopes in the chain my %heteroatoms; for (pairs zip $chain->heteroatoms, $chain->heteroatom_positions) { my( $element, $i ) = @$_; push @{$heteroatoms{$element}}, $i; } my %isotopes; for my $i (0..$#chain) { my $atom = $chain[$i]; next if blessed $atom; if( exists $atom->{isotope} ) { push @{$isotopes{$atom->{isotope} . element( $atom )}}, $i; } if( exists $atom->{h_isotope} && grep { defined $_ } @{$atom->{h_isotope}} ) { for (grep { defined $_ } @{$atom->{h_isotope}}) { push @{$isotopes{$_ . 'H'}}, $i; } } } # Collect the substituents my %attachments; my %attachment_objects; my @senior_group_attachments; for my $i (0..$#chain) { my $atom = $chain[$i]; for my $neighbour ($graph->neighbours( $atom )) { next if any { $_ == $neighbour } @chain; # Skip atoms from this chain my( $group ) = $graph->groups( $neighbour ); if( grep { $_ == $neighbour } @groups ) { push @senior_group_attachments, $i; } else { my $attachment_name = get_sidechain_name( $graph, $atom, $group ? $group : $neighbour ); push @{$attachments{$attachment_name}}, $i; $attachment_objects{$attachment_name} = $attachment_name; } } } # Collecting names of all the attachments my @order = sort { cmp_only_aphabetical( $a, $b ) || $a cmp $b } keys %attachments; my $name = ChemOnomatopist::Name->new; for my $i (0..$#order) { my $attachment_name = $order[$i]; my $attachment = $attachment_objects{$attachment_name}; if( $chain->needs_substituent_locants ) { $name->append_locants( $chain->locants( @{$attachments{$attachment_name}} ) ); } # FIXME: More rules from BBv2 P-16.3.4 and P-16.5.1 should be added if( !$attachment->is_enclosed && ( $attachment->starts_with_multiplier || # BBv2 P-16.3.4 (c) $attachment =~ /^[0-9]/ ) ) { $attachment->bracket; } if( @{$attachments{$attachment_name}} > 1 ) { my $number; if( $attachment->is_enclosed ) { $number = IUPAC_complex_numerical_multiplier( scalar @{$attachments{$attachment_name}} ); } else { $number = IUPAC_numerical_multiplier( scalar @{$attachments{$attachment_name}} ); $number .= 'a' unless $number =~ /^(|\?|.*i)$/; } $name .= $number; # BBv2 P-16.3.4 (a) if( !$attachment->is_enclosed && ( $attachment =~ /^dec/ || # BBv2 P-16.3.4 (d) $attachment->has_substituent_locant ) ) { $attachment->bracket; } } else { # This is an attempt to implement rules from P-16.5.1. # However, they are quite vague, thus there is not much of guarantee the following code is correct. if( !$attachment->is_enclosed && ($attachment->has_locant || !$attachment->is_simple) && $chain->needs_substituent_locants && $attachment ne 'tert-butyl' ) { $attachment->bracket; } } $name .= $attachment; } # Collecting names of all heteroatoms for my $element (sort { $elements{$a}->{seniority} <=> $elements{$b}->{seniority} } keys %heteroatoms) { if( $chain->needs_heteroatom_locants ) { $name->append_locants( $chain->locants( @{$heteroatoms{$element}} ) ); } if( $chain->needs_heteroatom_names ) { if( @{$heteroatoms{$element}} > 1 ) { my $number = IUPAC_numerical_multiplier( scalar @{$heteroatoms{$element}} ); $number .= 'a' unless $number =~ /^(|\?|.*i)$/; $name .= $number; } $name->append_element( $elements{$element}->{prefix} ); } } # Attaching isotopes my $isotopes = ''; for my $isotope (sort { cmp_isotopes( $a, $b ) } keys %isotopes) { if( $chain->needs_substituent_locants ) { # FIXME: Is this right? $isotopes .= join( ',', $chain->locants( @{$isotopes{$isotope}} ) ) . '-'; } $isotopes .= $isotope; $isotopes .= scalar @{$isotopes{$isotope}} if @{$isotopes{$isotope}} > 1 || $isotope =~ /H$/; } $name .= "($isotopes)" if $isotopes ne ''; $name .= $chain->suffix; if( $most_senior_group && !$most_senior_group->isa( ChemOnomatopist::Chain:: ) ) { if( $groups[0]->is_carbon ) { # Most senior group is carbon, thus it is in the chain as well my @senior_group_positions = grep { blessed $chain[$_] && $chain[$_]->isa( $most_senior_group ) } 0..$#chain; @senior_group_attachments = sort { $a <=> $b } @senior_group_attachments, @senior_group_positions; } my $number = IUPAC_numerical_multiplier( scalar @senior_group_attachments ); $number = '' if $number eq 'mono'; $number .= 'a' unless $number =~ /^(|\?|.*i)$/; # Terminal locants are not cited for 1 or 2 senior group attachments according to BBv2 P-14.3.4.1 if( $chain->needs_suffix_locant ) { $name->append_locants( $chain->locants( @senior_group_attachments ) ); } $name->append_multiplier( $number ); if( $chain->isa( ChemOnomatopist::Chain::Circular:: ) ) { $name->append_suffix( $groups[0]->suffix_if_cycle_substituent ); } elsif( @senior_group_attachments > 2 ) { $name->append_suffix( $groups[0]->multisuffix ); } else { $name->append_suffix( $groups[0]->suffix ); } } $name =~ s/benzen(-1-)?ol$/phenol/; $name = 'anisole' if $name eq 'methoxybenzene'; # BBv2 P-63.2.4.1 $name = 'benzoic acid' if $name eq 'benzenecarboxylic acid'; # BBv2 P-65.1.1.1 $name = 'toluene' if $name eq 'methylbenzene'; $name =~ s/^(\d,\d-)dimethylbenzene$/$1xylene/; $name = 'formic acid' if $name eq 'methanoic acid'; $name =~ s/ethanoic acid$/acetic acid/; # BBv2 P-65.1.1.1 $name =~ s/benzen(-1-)?amine$/aniline/; return $name; } sub find_groups { my( $graph ) = @_; # Attaching hydrogen atoms to their heavier neighbours for my $H (grep { is_element( $_, 'H' ) } $graph->vertices) { die "cannot handle shared hydrogen atoms\n" if $graph->degree( $H ) > 1; my( $parent ) = $graph->neighbours( $H ); die "cannot handle compounds with H-H bonds\n" if is_element( $parent, 'H' ); $parent->{hcount} = 0 unless exists $parent->{hcount}; $parent->{hcount}++; push @{$parent->{h_isotope}}, $H->{isotope}; $graph->delete_vertex( $H ); } # Detecting cyclic compounds my @ring_systems = cyclic_components( $graph ); # Aromatising mancudes - experimental for my $core (@ring_systems) { my %valences; for my $edge ($core->edges) { my $order = 1; if( $core->has_edge_attribute( @$edge, 'bond' ) ) { $order = $bond_symbol_to_order{$core->get_edge_attribute( @$edge, 'bond' )}; } $valences{$edge->[0]} += $order; $valences{$edge->[1]} += $order; } my %uniq = map { join( '', sort @$_ ) => $_ } SSSR( $core, 8 ); my @aromatic; for my $cycle (values %uniq) { next if any { $core->degree( $_ ) > 3 } @$cycle; my @v2 = grep { $core->degree( $_ ) == 2 } @$cycle; my @v3 = grep { $core->degree( $_ ) == 3 } @$cycle; my @nonaromatic_atoms = grep { $valences{$_} < 3 } @v2; if( @nonaromatic_atoms ) { next if @nonaromatic_atoms > 1; next unless element( $nonaromatic_atoms[0] ) =~ /^[NOP]$/; } push @aromatic, [ Graph::Traversal::DFS->new( subgraph( $core, @$cycle ) )->dfs ]; } for my $cycle (@aromatic) { my @vertices = @$cycle; for (0..$#vertices) { $graph->set_edge_attribute( $vertices[$_], $vertices[($_ + 1) % @vertices], 'bond', ':' ); if( $vertices[$_]->{symbol} =~ /^(Se|As|[BCNOPS])$/ ) { $vertices[$_]->{symbol} = lcfirst $vertices[$_]->{symbol}; } } } } for my $core (@ring_systems) { my %vertices_by_degree; for my $vertex ($core->vertices) { my $degree = $core->degree( $vertex ); $vertices_by_degree{$degree} = [] unless $vertices_by_degree{$degree}; push @{$vertices_by_degree{$degree}}, $vertex; } my $compound; if( join( ',', sort keys %vertices_by_degree ) eq '2' ) { # Monocycles $compound = ChemOnomatopist::Chain::Monocycle->new( $graph, Graph::Traversal::DFS->new( $core )->dfs ); } elsif( ChemOnomatopist::Chain::Monospiro->has_form( $core ) ) { # BBv2 P-24.2.1 Monospiro alicyclic ring systems $compound = ChemOnomatopist::Chain::Monospiro->new( $graph, $core->vertices ); } elsif( ChemOnomatopist::Chain::Bicycle->has_form( $core ) ) { # Ortho-fused as defined in BBv2 P-25.3.1.1.1 $compound = ChemOnomatopist::Chain::Bicycle->new( $graph, $core->vertices ); } elsif( ChemOnomatopist::Chain::Porphyrin->has_form( $core ) ) { # Porphyrin $compound = ChemOnomatopist::Chain::Porphyrin->new( $graph, $core->vertices ); } elsif( join( ',', sort keys %vertices_by_degree ) eq '2,3' ) { # Fused ring systems of three or more rings # Graph::MoreUtils::SSSR v0.1.0 does not know how to return unique rings my %uniq = map { join( '', sort @$_ ) => $_ } SSSR( $core, 8 ); my @cycles = map { ChemOnomatopist::Chain::Monocycle->new( copy $graph, Graph::Traversal::DFS->new( $_ )->dfs ) } map { subgraph( $core, @$_ ) } values %uniq; if( (grep { $_->is_benzene } @cycles) == 2 && (grep { !$_->is_benzene } @cycles) == 1 && (all { $_->length == 6 } @cycles) && (any { !$_->is_hydrocarbon } @cycles) && are_isomorphic( graph_without_edge_attributes( $core ), ChemOnomatopist::Chain::Xanthene->ideal_graph, sub { return 'C' } ) ) { $compound = ChemOnomatopist::Chain::Xanthene->new( $graph, @cycles ); } elsif( @cycles >= 3 && (all { $_->length == 6 && $_->is_hydrocarbon } @cycles) && ChemOnomatopist::Chain::Polyacene->has_form( $core ) ) { $compound = ChemOnomatopist::Chain::Polyacene->new( $graph, @cycles ); } elsif( @cycles == 3 && (all { $_->length == 6 } @cycles) && are_isomorphic( graph_without_edge_attributes( $core ), ChemOnomatopist::Chain::Phenanthrene->ideal_graph, sub { return 'C' } ) ) { $compound = ChemOnomatopist::Chain::Phenanthrene->new( $graph, @cycles ); } elsif( @cycles >= 4 && (all { $_->length == 6 && $_->is_hydrocarbon } @cycles) && ChemOnomatopist::Chain::Polyaphene->has_form( $core ) ) { $compound = ChemOnomatopist::Chain::Polyaphene->new( $graph, @cycles ); } else { die "cannot handle complicated cyclic compounds\n"; } } else { die "cannot handle complicated cyclic compounds\n"; } $graph->add_group( $compound ); } # First pass is to detect guanidine and hydrazine for my $atom ($graph->vertices) { next if $graph->groups( $atom ); my @neighbours = $graph->neighbours( $atom ); my @N = grep { is_element( $_, 'N' ) } @neighbours; my $H = $atom->{hcount} ? $atom->{hcount} : 0; if( is_element( $atom, 'C' ) && @neighbours == 3 && @N == 3 && !$H ) { # Detecting guanidine my $guanidine = ChemOnomatopist::Group::Guanidine->new( $graph, $atom ); $graph->add_group( $guanidine ); $graph->delete_vertex( $atom ); for (combinations( \@N, 2 )) { $graph->add_edge( @$_ ); } } if( is_element( $atom, 'N' ) && @N == 1 && (all { !$graph->groups( $_ ) } @N) && is_single_bond( $graph, $atom, @N ) ) { # Detecting hydrazine my $hydrazine = ChemOnomatopist::Group::Hydrazine->new( $graph, $atom, @N ); $graph->add_group( $hydrazine ); } } for my $atom ($graph->vertices) { next if $graph->groups( $atom ); my @neighbours = $graph->neighbours( $atom ); my @C = grep { is_element( $_, 'C' ) } @neighbours; my @N = grep { is_element( $_, 'N' ) } @neighbours; my @O = grep { is_element( $_, 'O' ) } @neighbours; my @S = grep { is_element( $_, 'S' ) } @neighbours; my @Se = grep { is_element( $_, 'Se' ) } @neighbours; my @Te = grep { is_element( $_, 'Te' ) } @neighbours; my $H = $atom->{hcount} ? $atom->{hcount} : 0; # Nitroso and its analogues if( @neighbours + $H == 2 && @C == 1 && @O == 1 && is_double_bond( $graph, $atom, @O ) && any { is_element( $atom, $_ ) } qw( Br Cl F I N ) ) { my $nitroso = ChemOnomatopist::Group::Nitroso->new( element( $atom ) ); graph_replace( $graph, $nitroso, $atom, @O ); next; } # N-based groups if( is_element( $atom, 'N' ) && @C == 1 && @O == 2 && $atom->{charge} && $atom->{charge} == 1 && (any { is_double_bond( $graph, $atom, $_ ) } @O) && (any { !is_double_bond( $graph, $atom, $_ ) && $_->{charge} && $_->{charge} == -1 } @O) ) { # Detecting nitro my $nitro = ChemOnomatopist::Group::Nitro->new; graph_replace( $graph, $nitro, $atom, @O ); } elsif( is_element( $atom, 'N' ) && @neighbours + $H == 3 ) { # Detecting amines # BBv2 P-62.3 says "amines must have three single bonds linked to at least one carbon atom" my $amine = ChemOnomatopist::Group::Amine->new; graph_replace( $graph, $amine, $atom ); } elsif( is_element( $atom, 'N' ) && @neighbours + $H == 2 && any { is_double_bond( $graph, $atom, $_ ) } @neighbours ) { # Detecting imines # BBv2 P-62.3 says "Imines must have a double bond between a carbon atom and the nitrogen." my $imine = ChemOnomatopist::Group::Imino->new; graph_replace( $graph, $imine, $atom ); } elsif( is_element( $atom, 'N' ) && @neighbours + $H == 1 && @C == 1 && $graph->degree( @C ) >= 2 && is_triple_bond( $graph, $atom, @C ) ) { # Detecting cyanide my( $C ) = grep { $_ != $atom } $graph->neighbours( @C ); my $cyanide = ChemOnomatopist::Group::Cyanide->new; graph_replace( $graph, $cyanide, $atom, @C ); } # Hydroxy groups and their chalcogen analogues if( @neighbours == 1 && $H == 1 && ( @C || @N || @O || @S || @Se || @Te ) && any { is_element( $atom, $_ ) } qw( O S Se Te ) ) { my $hydroxy = ChemOnomatopist::Group::Hydroxy->new( element( $atom ) ); graph_replace( $graph, $hydroxy, $atom ); } # Ketones and their chalcogen analogues if( @neighbours + $H == 1 && @C == 1 && is_double_bond( $graph, $atom, @C ) && any { is_element( $atom, $_ ) } qw( O S Se Te ) ) { my $ketone = ChemOnomatopist::Group::Ketone->new( element( $atom ) ); graph_replace( $graph, $ketone, $atom ); } # Ether if( is_element( $atom, 'O' ) && @neighbours + $H == 2 && @C == 2 ) { my $ether = ChemOnomatopist::Group::Ether->new; graph_replace( $graph, $ether, $atom ); } # XO3 if( @neighbours + $H == 4 && @C == 1 && @O == 3 && (all { is_double_bond( $graph, $atom, $_ ) } @O) && any { is_element( $atom, $_ ) } qw( Br Cl F I ) ) { my $XO3 = ChemOnomatopist::Group::XO3->new( element( $atom ) ); graph_replace( $graph, $XO3, $atom, @O ); } # Sulfinyl group and its analogues if( @neighbours + $H == 3 && @O == 1 && is_double_bond( $graph, $atom, @O ) && any { is_element( $atom, $_ ) } qw( S Se Te ) ) { my $sulfinyl = ChemOnomatopist::Group::Sulfinyl->new( element( $atom ) ); graph_replace( $graph, $sulfinyl, $atom, @O ); } # Sulfonyl group and its analogues if( @neighbours == 4 && @O == 2 && (all { is_double_bond( $graph, $atom, $_ ) } @O) && any { is_element( $atom, $_ ) } qw( S Se Te ) ) { my $sulfonyl = ChemOnomatopist::Group::Sulfonyl->new( element( $atom ) ); graph_replace( $graph, $sulfonyl, $atom, @O ); } } if( any { !blessed $_ && exists $_->{charge} } $graph->vertices ) { die "cannot handle charges for now\n"; } # Due to the issue in Graph, bridges() returns strings instead of real objects. # Graph issue: https://github.com/graphviz-perl/Graph/issues/29 my %vertices_by_name = map { $_ => $_ } $graph->vertices; my $cut_vertices = set( map { $vertices_by_name{$_} } map { @$_ } $graph->bridges ); # Second pass is needed to build on top of these trivial groups for my $atom ($graph->vertices) { next if $graph->groups( $atom ); my @neighbours = $graph->neighbours( $atom ); my @groups = grep { blessed $_ && $_->isa( ChemOnomatopist::Group:: ) } @neighbours; my @C = grep { is_element( $_, 'C' ) } @neighbours; my @H = grep { is_element( $_, 'H' ) } @neighbours; my @N = grep { is_element( $_, 'N' ) } @neighbours; my @O = grep { is_element( $_, 'O' ) } @neighbours; my $H = $atom->{hcount} ? $atom->{hcount} : 0; if( is_element( $atom, 'C' ) && @groups == 1 && $H == 1 && $groups[0]->isa( ChemOnomatopist::Group::Ketone:: ) ) { # Detecting aldehyde my $aldehyde = ChemOnomatopist::Group::Aldehyde->new( @groups ); $graph->delete_vertices( @groups ); $graph->add_edges( map { $aldehyde, $_ } $graph->neighbours( $atom ) ); $graph->delete_vertex( $atom ); } elsif( is_element( $atom, 'C' ) && @neighbours + $H == 3 && @groups >= 1 && @O == 2 && (any { $_->isa( ChemOnomatopist::Group::Ketone:: ) } @groups) && ( (any { $_->isa( ChemOnomatopist::Group::Hydroxy:: ) } @groups) || (all { $graph->degree( $_ ) == 1 } @O) ) ) { # Detecting carboxyl my( $parent ) = grep { !is_element( $_, 'O' ) } @neighbours; my $carboxyl = ChemOnomatopist::Group::Carboxyl->new( $parent ); $graph->delete_vertices( $atom, @O ); $graph->add_vertex( $carboxyl ); $graph->add_edges( $carboxyl, $parent ) if $parent; } elsif( is_element( $atom, 'C' ) && @groups == 1 && @C == 1 && @O == 2 && $groups[0]->isa( ChemOnomatopist::Group::Ketone:: ) && all { $cut_vertices->has( $_ ) } @O ) { # Detecting esters # Both oxygens have to be cut vertices to avoid one being in a ring $graph->delete_vertices( @groups ); my( $hydroxylic ) = grep { $_ != $atom } map { $graph->neighbours( $_ ) } grep { !blessed $_ } @O; my( $acid ) = @C; my $ester = ChemOnomatopist::Group::Ester->new( $hydroxylic, $acid ); $graph->add_edges( $ester, $hydroxylic ); $graph->add_edges( $ester, $acid ); $graph->delete_vertices( $atom, @O ); } elsif( is_element( $atom, 'C' ) && @groups == 2 && (any { $_->isa( ChemOnomatopist::Group::Amine:: ) } @groups) && (any { $_->isa( ChemOnomatopist::Group::Ketone:: ) } @groups) ) { # Detecting amides my $amide = ChemOnomatopist::Group::Amide->new( $atom ); my( $amine ) = grep { $_->isa( ChemOnomatopist::Group::Amine:: ) } @groups; my( $ketone ) = grep { $_->isa( ChemOnomatopist::Group::Ketone:: ) } @groups; $graph->delete_vertices( $ketone ); graph_replace( $graph, $amide, $amine ); } elsif( is_element( $atom, 'C' ) && @neighbours + $H == 3 && @C == 1 && @groups == 1 && $groups[0]->isa( ChemOnomatopist::Group::Ketone:: ) && is_element( @groups, 'O' ) && element( grep { !blessed $_ && !is_element( $_, 'C' ) } @neighbours ) =~ /^(F|Cl|Br|I)$/ ) { my( $halide ) = grep { !blessed $_ && !is_element( $_, 'C' ) } @neighbours; my $acyl_halide = ChemOnomatopist::Group::AcylHalide->new( $halide ); graph_replace( $graph, $acyl_halide, $atom, grep { blessed $_ || !is_element( $_, 'C' ) } @neighbours ); } elsif( is_element( $atom, 'C' ) && @N == 1 && @O == 1 && $graph->groups( @N ) && (any { $_->isa( ChemOnomatopist::Group::Hydrazine:: ) } $graph->groups( @N )) && $O[0]->isa( ChemOnomatopist::Group::Ketone:: ) ) { # Detect hydrazide my( $hydrazine ) = grep { $_->isa( ChemOnomatopist::Group::Hydrazine:: ) } $graph->groups( @N ); my @vertices = $hydrazine->vertices; @vertices = reverse @vertices if $vertices[0] == $N[0]; my $hydrazide = ChemOnomatopist::Group::Hydrazide->new( $graph, @vertices ); $graph->delete_vertices( @O ); $graph->add_group( $hydrazide ); $graph->delete_group( $hydrazine ); } if( !blessed $atom && is_element( $atom, 'N' ) && @neighbours >= 2 ) { die "cannot process secondary and tertiary amines yet\n"; } # Detecting sulfinic acids if( !blessed $atom && is_element( $atom, 'S' ) && @neighbours + $H == 3 && @C == 1 && @O == 2 && @groups == 1 && $groups[0]->isa( ChemOnomatopist::Group::Hydroxy:: ) && is_double_bond( $graph, $atom, grep { !blessed $_ } @O ) ) { my $acid = ChemOnomatopist::Group::SulfinicAcid->new( @C ); graph_replace( $graph, $acid, $atom, @O ); } # Detecting sulfonic acids if( !blessed $atom && is_element( $atom, 'S' ) && @neighbours + $H == 4 && @C == 1 && @O == 3 && @groups == 1 && $groups[0]->isa( ChemOnomatopist::Group::Hydroxy:: ) && all { is_double_bond( $graph, $atom, $_ ) } grep { !blessed $_ } @O ) { my $acid = ChemOnomatopist::Group::SulfonicAcid->new( @C ); graph_replace( $graph, $acid, $atom, @O ); } if( !blessed $atom && @C == 1 && @groups == 1 && ( is_element( $atom, 'O' ) || is_element( $atom, 'S' ) || is_element( $atom, 'Se' ) || is_element( $atom, 'Te' ) ) && $groups[0]->isa( ChemOnomatopist::Group::Hydroxy:: ) ) { my $hydroperoxide = ChemOnomatopist::Group::Hydroperoxide->new( $atom, @groups ); graph_replace( $graph, $hydroperoxide, $atom, @groups ); } } # Detecting amides attached to cyclic chains for my $atom ($graph->vertices) { next if blessed $atom; next unless element( $atom ) eq 'C'; next unless $graph->degree( $atom ) == 2; next if $graph->groups( $atom ); my @neighbours = $graph->neighbours( $atom ); my $amide = first { blessed $_ && $_->isa( ChemOnomatopist::Group::Amide:: ) } @neighbours; next unless $amide; next unless $amide->{parent} == $atom; my $ring_atom = first { $_ != $amide } @neighbours; my( $cycle ) = $graph->groups( $ring_atom ); next unless $cycle; next unless $cycle->isa( ChemOnomatopist::Chain::Monocycle:: ); $graph->delete_group( $cycle ); $graph->add_group( ChemOnomatopist::Chain::Carboxamide->new( $graph, $amide, $atom, $cycle ) ); } # Safeguarding against multiple cycle amides sharing the same amido group. # Otherwise this may lead to endless loops. my @amides_in_carboxamides = map { $_->{amide} } grep { $_->isa( ChemOnomatopist::Chain::Carboxamide:: ) } $graph->groups; if( @amides_in_carboxamides > uniq @amides_in_carboxamides ) { die "cannot process multiple cycle amides sharing the same amide group\n"; } return; } # Derive the chemical element of atom or group representation # TODO: Replace object methods is_... sub element { my( $atom_or_group ) = @_; return undef unless ref $atom_or_group; if( !blessed $atom_or_group ) { die "unknown value '$atom_or_group' given for element()\n" unless ref $atom_or_group eq 'HASH'; return ucfirst $atom_or_group->{symbol}; } if( $atom_or_group->isa( 'Chemistry::Atom' ) ) { # PerlMol Atom return $atom_or_group->symbol; } return $atom_or_group->element; } # Check if an object or Perl hash is of certain chemical element sub is_element { my( $atom, $element ) = @_; return unless ref $atom; $element = ucfirst $element; if( blessed $atom ) { return element( $atom ) && element( $atom ) eq $element; } return ref $atom eq 'HASH' && exists $atom->{symbol} && ucfirst $atom->{symbol} eq $element; } # Given a graph, selects the main chain. # The returned chain is an object of ChemOnomatopist::Chain or its subclasses. # The selection is implemented according to P-41 sub select_mainchain { my( $graph ) = @_; my @POI; # Points of interest # POIs are atoms connected to the most senior groups, if any my @groups = most_senior_groups( $graph ); my $most_senior_group = blessed $groups[0] if @groups; if( $most_senior_group && $most_senior_group eq ChemOnomatopist::Group::Ether:: ) { $most_senior_group = undef; @groups = (); } for my $group (@groups) { if( $group->is_part_of_chain ) { push @POI, $group; } elsif( $group->isa( ChemOnomatopist::Group::Amide:: ) ) { push @POI, $group->{parent}; } elsif( $group->isa( ChemOnomatopist::Group::Imino:: ) ) { push @POI, grep { is_double_bond( $graph, $group, $_ ) } $graph->neighbours( $group ); } else { push @POI, $graph->neighbours( $group ); } } @POI = uniq @POI; # FIXME: Actually, more than one group can be attached to the same vertex # "Classes denoted by the senior atom in heterane nomenclature" if( !@POI ) { my $elements = set( map { element( $_ ) } grep { !blessed $_ || !$_->isa( ChemOnomatopist::Group::Ether:: ) } # Ethers are less senior grep { !blessed $_ || !$_->is_prefix_only } # Prefix-only groups cannot act as main chains $graph->vertices ); my( $most_senior_element ) = grep { $elements->has( $_ ) } qw( N P As Sb Bi Si Ge Sn Pb B Al Ga In Tl O S Se Te ); # C removed intentionally if( $most_senior_element ) { @POI = grep { defined element( $_ ) && element( $_ ) eq $most_senior_element } $graph->vertices; } } # "40. Carbon compounds: rings, chains" # The remaining cycles will be homocycles if( !@POI ) { @POI = grep { $_->isa( ChemOnomatopist::Chain::Circular:: ) } $graph->groups; } # "41. Ethers, then sulfides, sulfoxides, sulfones; then selenides, selenoxides, etc." if( !@POI ) { @groups = most_senior_groups( grep { $_->isa( ChemOnomatopist::Group::Ether:: ) } grep { blessed $_ } $graph->vertices ); @POI = @groups; $most_senior_group = blessed $groups[0] if @groups; } print STDERR ">>> most senior functional group: $most_senior_group\n" if $DEBUG; my @parents = @POI; my @chains; if( @parents ) { # Select a chain containing most POIs # Prefer circular structures if( map { $graph->groups( $_ ) } @parents ) { @parents = uniq map { $graph->groups( $_ ) } @parents; } if( all { blessed $_ && $_->isa( ChemOnomatopist::Chain:: ) } @parents ) { @chains = map { $_->can( 'candidates' ) ? $_->candidates : $_ } @parents; } elsif( $most_senior_group && $most_senior_group->isa( ChemOnomatopist::Chain:: ) ) { @chains = map { $_->can( 'candidates' ) ? $_->candidates : $_ } @groups; } elsif( @parents == 1 ) { if( blessed $parents[0] && $parents[0]->can( 'candidates' ) ) { @chains = $parents[0]->candidates; } else { # As the starting position is known, it is enough to take the "sidechain" # containing this particular parent: my $chain = select_sidechain( $graph, (blessed $groups[0] && $groups[0]->is_terminal ? @groups : undef), @parents ); my @vertices = $chain->vertices; push @chains, ChemOnomatopist::Chain->new( $graph, undef, @vertices ); push @chains, ChemOnomatopist::Chain->new( $graph, undef, reverse @vertices ) if @vertices > 1; } } elsif( @parents ) { my $copy = $graph->copy; $copy->delete_vertices( map { $_->vertices } $copy->groups ); $copy->delete_vertices( grep { blessed $_ && !$_->is_part_of_chain } $copy->vertices ); my @paths; my $max_value; for my $i (0..$#parents) { for my $j (($i+1)..$#parents) { my @path = graph_path_between_vertices( $copy, $parents[$i], $parents[$j] ); next unless @path; my $value = (set( @parents ) * set( @path ))->size; if( !defined $max_value || $max_value < $value ) { @paths = ( \@path ); $max_value = $value; } elsif( $max_value == $value ) { push @paths, \@path; } } } # Maybe there is no path between any given pair of POIs. # If so, single-POI paths can be made. @paths = map { [ $_, $_ ] } @parents unless @paths; # Construct all chains having all possible extensions to both sides of the selected path my %longest_paths; for my $path (@paths) { my $copy = copy $graph; $copy->delete_path( @$path ); $copy->delete_vertices( grep { !is_element( $_, 'C' ) && !$_->is_part_of_chain } grep { blessed $_ } $copy->vertices ); my $A = shift @$path; my $B = pop @$path; if( !exists $longest_paths{$A} ) { $longest_paths{$A} = [ graph_longest_paths_from_vertex( $copy, $A ) ]; } if( !exists $longest_paths{$B} ) { $longest_paths{$B} = [ graph_longest_paths_from_vertex( $copy, $B ) ]; } for my $i (0..$#{$longest_paths{$A}}) { for my $j (0..$#{$longest_paths{$B}}) { if( $A == $B ) { if( $i == $j ) { push @chains, ChemOnomatopist::Chain->new( $graph, undef, @{$longest_paths{$A}->[$i]} ); next; } elsif( $longest_paths{$A}->[$i][1] == $longest_paths{$A}->[$j][1] ) { next; } # CHECKME: Maybe we are getting too many combinations? } push @chains, ChemOnomatopist::Chain->new( $graph, undef, reverse( @{$longest_paths{$A}->[$i]} ), @$path, @{$longest_paths{$B}->[$j]} ), ChemOnomatopist::Chain->new( $graph, undef, reverse( @{$longest_paths{$B}->[$j]} ), reverse( @$path ), @{$longest_paths{$A}->[$j]} ); } } } die "cannot determine the parent structure\n" unless @chains; # FIXME: This should probably be replaced by "most POIs" @chains = rule_most_groups( $most_senior_group, @chains ) if $most_senior_group; } elsif( @groups ) { # Attempt to build chains from functional groups @chains = map { ChemOnomatopist::Chain->new( $graph, undef, $_ ) } @groups; } else { die "cannot determine the parent structure\n"; } } elsif( $graph->groups ) { @chains = map { $_->can( 'candidates' ) ? $_->candidates : $_ } $graph->groups; # FIXME: This is a hack } else { # Here the candidate halves for the longest (and "best") path are placed in @path_parts. # Each of candidate halves start with center atom. my $subgraph = copy $graph; $subgraph->delete_vertices( grep { blessed $_ && $_->isa( ChemOnomatopist::Group:: ) && !$_->is_part_of_chain } $subgraph->vertices ); my @center = graph_center( $subgraph ); my @path_parts; if( @center == 1 ) { # Longest path has odd length for my $path ( graph_longest_paths_from_vertex( $subgraph, $center[0] ) ) { push @path_parts, ChemOnomatopist::ChainHalf->new( $graph, undef, @$path ); } } else { # Longest path has even length # Graph copy without center edge is required by graph_longest_paths_from_vertex() my $copy = copy $subgraph; $copy->delete_edge( @center ); for my $vertex ( @center ) { push @path_parts, map { ChemOnomatopist::ChainHalf->new( $graph, (grep { $_ ne $vertex } @center), @$_ ) } graph_longest_paths_from_vertex( $copy, $vertex ); } } return shift @path_parts if @path_parts == 1; # methane # Generate all possible chains. # FIXME: This needs optimisation. for my $part1 (@path_parts) { for my $part2 (@path_parts) { next if $part1->group eq $part2->group; push @chains, ChemOnomatopist::Chain::FromHalves->new( $part1, $part2 ); } } } die "cannot determine the parent structure\n" unless @chains; my $chain = filter_chains( @chains ); my @vertices = $chain->vertices; # Recognising amide chains if( $most_senior_group && $most_senior_group eq ChemOnomatopist::Group::Amide:: && !$chain->isa( ChemOnomatopist::Chain::Carboxamide:: ) ) { $chain = ChemOnomatopist::Chain::Amide->new( $graph, $chain, @groups ); } # Recognising amine chains if( $most_senior_group && $most_senior_group eq ChemOnomatopist::Group::Amine:: ) { $chain = ChemOnomatopist::Chain::Amine->new( $graph, $chain, @groups ); } # Recognising imino chains if( $most_senior_group && $most_senior_group eq ChemOnomatopist::Group::Imino:: ) { $chain = ChemOnomatopist::Chain::Imino->new( $graph, $chain, @groups ); } # This is needed to detect ethers. # However, it clears the cache of chains, thus is quite suboptimal. if( blessed $chain eq ChemOnomatopist::Chain::FromHalves:: ) { $chain = ChemOnomatopist::Chain->new( $graph, undef, @vertices ); } # Replace the original chain with the selected candidate # TODO: This code is either dead or unused, remove if( $chain->isa( ChemOnomatopist::Group:: ) && $chain->candidate_for ) { graph_replace( $graph, $chain, $chain->candidate_for ); } # If there is at least one of carbon-based senior group attachment, # it means both ends are already senior, prompting to follow the # exception of three or more carbon-based groups. if( $most_senior_group && $groups[0]->is_carbon && !$chain->isa( ChemOnomatopist::Chain::Circular:: ) && $chain->number_of_groups( $most_senior_group ) ) { shift @vertices; pop @vertices; $chain = ChemOnomatopist::Chain->new( $graph, undef, @vertices ); } print STDERR ">>> mainchain: $chain (length = " . $chain->length . ")\n" if $DEBUG; return $chain; } # Selects the best side chain sub select_sidechain { my( $graph, $parent, $start ) = @_; # Do this for non-carbons for now in order to represent attachments if( !is_element( $start, 'C' ) && $graph->degree( $start ) == 0 + defined $parent ) { return ChemOnomatopist::Chain->new( $graph, $parent, $start ); } my $C_graph = copy $graph; $C_graph->delete_edge( $start, $parent ) if $parent; if( $graph->degree( $start ) == 1 + defined $parent && grep { element( $start ) && element( $start ) eq $_ } qw( S Se Te ) ) { # Chalcogen analogues of ethers $C_graph->delete_vertices( grep { !is_element( $_, element( $start ) ) } $C_graph->vertices ); } else { # Delete non-carbon leaves $C_graph->delete_vertices( grep { $_ != $start && !is_element( $_, 'C' ) && $C_graph->degree( $_ ) == 1 } $C_graph->vertices ); } # Delete formed chains $C_graph->delete_vertices( grep { $_ != $start } map { $_->vertices } $C_graph->groups ); # FIXME: Ad-hoc, but works... $C_graph->delete_vertices( grep { blessed $_ && $_->isa( ChemOnomatopist::Group::Amine:: ) } $C_graph->vertices ); return ChemOnomatopist::Chain->new( $graph, $parent, $start ) unless $C_graph->degree( $start ); my @path_parts; for my $neighbour ($C_graph->neighbours( $start )) { my $graph_copy = copy $C_graph; $graph_copy->delete_edge( $start, $neighbour ); for my $path ( graph_longest_paths_from_vertex( $graph_copy, $neighbour ) ) { push @path_parts, ChemOnomatopist::ChainHalf->new( $graph, undef, $start, @$path ); } } my @chains; if( $C_graph->degree( $start ) > 1 && (!blessed $start || !$start->is_terminal) ) { # FIXME: Deduplicate: copied from select_mainchain() # Generate all possible chains. # FIXME: This needs optimisation. for my $part1 (@path_parts) { for my $part2 (@path_parts) { next if $part1->group eq $part2->group; push @chains, ChemOnomatopist::Chain::FromHalves->new( $part1, $part2 ); } } } else { @chains = map { ChemOnomatopist::Chain->new( $graph, $parent, $_->vertices ) } @path_parts; } die "cannot select a sidechain\n" unless @chains; # From BBv2 P-29.2 my $rule_lowest_free_valence = sub { my( @chains ) = @_; my @chains_now; my $lowest_locant; for my $chain (@chains) { my @vertices = $chain->vertices; my( $locant ) = grep { $vertices[$_] == $start } 0..$#vertices; if( @chains_now ) { if( $lowest_locant > $locant ) { @chains_now = ( $chain ); $lowest_locant = $locant; } elsif( $lowest_locant == $locant ) { push @chains_now, $chain; } } else { @chains_now = ( $chain ); $lowest_locant = $locant; } } return @chains_now; }; for my $rule ( sub { return @_ }, \&rule_longest_chains, \&rule_greatest_number_of_side_chains, # After this rule we are left with a set of longest chains all having the same number of side chains $rule_lowest_free_valence, \&rule_most_multiple_bonds, \&rule_most_double_bonds, \&rule_lowest_numbered_multiple_bonds, \&rule_lowest_numbered_locants, \&rule_most_carbon_in_side_chains, \&rule_least_branched_side_chains, \&pick_chain_with_lowest_attachments_alphabetically ) { my @chains_now = $rule->( @chains ); # CHECK: Can a rule cause disappearance of all chains? next unless @chains_now; @chains = @chains_now; # Narrow down the selection # If a single chain cannot be chosen now, pass on to the next rule next unless @chains == 1; return shift @chains; } die "cannot select a sidechain\n"; } # TODO: Should reflect the order described in BBv2 P-44 sub filter_chains { my( @chains ) = @_; for my $rule ( sub { return @_ }, # P-44.1.1: Maximum number of substituents of principal characteristic group. # This is not needed as select_mainchain() returns such chains. # TODO: P-44.1.2: Concerns rings # P-44.3.1: Maximum number of heteroatoms of any kind \&rule_most_heteroatoms, # P-44.3.2: Maximum number of skeletal atoms \&rule_longest_chains, # P-44.3.3: Maximum number of the most senior skeletal heteroatom \&rule_greatest_number_of_most_senior_heteroatoms, # P-44.4.1.1: Maximum number of multiple bonds \&rule_most_multiple_bonds, # P-44.4.1.2: Maximum number of double bonds \&rule_most_double_bonds, # TODO: P-44.4.1.3: Nonstandard bonding numbers # TODO: P-44.4.1.4: Concerns rings with indicated hydrogen # \&rule_most_indicated_hydrogens, # There is no such rule, but before comparing lists they have to be of the same size? # P-44.4.1.5: Lowest locants for heteroatoms in skeletal chain \&rule_lowest_numbered_heteroatoms, # P-44.4.1.6: Lowest locants for heteroatoms in skeletal chain according to heteroatom seniority \&rule_lowest_numbered_most_senior_heteroatoms, # TODO: P-44.4.1.7: Concerns fused rings # P-44.4.1.8: Lowest locants for suffix groups \&rule_lowest_numbered_senior_groups, # TODO: P-44.4.1.9: Concerns rings # TODO: P-44.4.1.10: Lowest locants for prefixes/suffixes expressing degrees of hydrogenation # This is not fully implemented now \&rule_lowest_numbered_multiple_bonds, # TODO: P-44.4.1.11: Concerns isotopes # TODO: P-44.4.1.12: Concerns stereogenic centers # TODO: P-45.1: Multiplication of identical senior parent structures # P-45.2.1: Maximum number of prefix substituents # FIXME: This includes suffix substituents now \&rule_greatest_number_of_side_chains, # P-45.2.2: Lowest locants for prefix substituents # FIXME: This includes suffix substituents now \&rule_lowest_numbered_locants, # TODO: P-45.2.3: Lowest locants for prefix substituents in their order of citation in the name # TODO: P-45.3: Nonstandard bond numbers # TODO: P-45.4: Concerns isotopes # TODO: P-45.5: Alphanumerical order of names (maybe covered by P-45.2.3 already?) # TODO: P-45.6: Concerns stereochemistry # TODO: Put these in correct order: \&rule_most_carbon_in_side_chains, \&rule_least_branched_side_chains, \&pick_chain_with_lowest_attachments_alphabetically ) { my @chains_now = $rule->( @chains ); if( $DEBUG ) { require Sub::Identify; print STDERR '>>> ', Sub::Identify::sub_name( $rule ), "\n"; } # CHECK: Can a rule cause disappearance of all chains? next unless @chains_now; @chains = @chains_now; # Narrow down the selection # If a single chain cannot be chosen now, pass on to the next rule return shift @chains if @chains == 1; } # TODO: Handle the case when none of the rules select proper chains return (); } sub rule_most_groups { my( $class, @chains ) = @_; my( $max_value ) = sort { $b <=> $a } map { $_->number_of_groups( $class ) } @chains; return grep { $_->number_of_groups( $class ) == $max_value } @chains; } sub rule_lowest_numbered_senior_groups { my( @chains ) = @_; my( $max_value ) = sort { cmp_arrays( [ $a->most_senior_group_positions ], [ $b->most_senior_group_positions ] ) } @chains; return grep { !cmp_arrays( [ $_->most_senior_group_positions ], [ $max_value->most_senior_group_positions ] ) } @chains; } sub rule_lowest_numbered_multiple_bonds { my @chains = @_; my( $max_value ) = sort { cmp_arrays( $a, $b ) } map { [ $_->multiple_bond_positions ] } @chains; return grep { !cmp_arrays( [ $_->multiple_bond_positions ], $max_value ) } @chains; } # This rule is employed only if longest chains are not already preselected sub rule_longest_chains { my( @chains ) = @_; my( $max_value ) = sort { $b <=> $a } uniq map { $_->length } @chains; return grep { $_->length == $max_value } @chains; } sub rule_greatest_number_of_side_chains { my( @chains ) = @_; my( $max_value ) = sort { $b <=> $a } uniq map { $_->number_of_branches } @chains; return grep { $_->number_of_branches == $max_value } @chains; } sub rule_lowest_numbered_locants { my( @chains ) = @_; my( $max_value ) = sort { cmp_arrays( [ $a->branch_positions ], [ $b->branch_positions ] ) } @chains; return grep { !cmp_arrays( [ $_->branch_positions ], [ $max_value->branch_positions ] ) } @chains; } sub rule_most_carbon_in_side_chains { my( @chains ) = @_; my( $max_value ) = sort { $b <=> $a } uniq map { $_->number_of_carbons } @chains; return grep { $_->number_of_carbons == $max_value } @chains; } sub rule_least_branched_side_chains { my( @chains ) = @_; my( $min_value ) = sort uniq map { $_->number_of_branches_in_sidechains } @chains; return grep { $_->number_of_branches_in_sidechains == $min_value } @chains; } sub rule_most_heteroatoms { my( @chains ) = @_; my( $max_value ) = sort { $b <=> $a } map { $_->number_of_heteroatoms } @chains; return grep { $_->number_of_heteroatoms == $max_value } @chains; } sub rule_greatest_number_of_most_senior_heteroatoms { my( @chains ) = @_; my( $max_value ) = sort { cmp_heteroatom_counts( $a, $b ) } map { [ $_->heteroatoms ] } @chains; return grep { !cmp_heteroatom_counts( [ $_->heteroatoms ], $max_value ) } @chains; } sub rule_most_multiple_bonds { my( @chains ) = @_; my( $max_value ) = sort { $b <=> $a } map { $_->number_of_multiple_bonds } @chains; return grep { $_->number_of_multiple_bonds == $max_value } @chains; } sub rule_most_double_bonds { my( @chains ) = @_; my( $max_value ) = sort { $b <=> $a } map { $_->number_of_double_bonds } @chains; return grep { $_->number_of_double_bonds == $max_value } @chains; } sub rule_most_indicated_hydrogens { my( @chains ) = @_; my( $max_value ) = sort { $b <=> $a } map { $_->number_of_indicated_hydrogens } @chains; return grep { $_->number_of_indicated_hydrogens == $max_value } @chains; } sub rule_lowest_numbered_heteroatoms { my( @chains ) = @_; my( $max_value ) = sort { cmp_arrays( [ $a->heteroatom_positions ], [ $b->heteroatom_positions ] ) } @chains; return grep { !cmp_arrays( [ $_->heteroatom_positions ], [ $max_value->heteroatom_positions ] ) } @chains; } # Chains given for this rule have the same number of heteroatoms at the same positions. sub rule_lowest_numbered_most_senior_heteroatoms { my( @chains ) = @_; my( $max_value ) = sort { cmp_heteroatom_seniority( [ $a->heteroatoms ], [ $b->heteroatoms ] ) } @chains; return grep { !cmp_heteroatom_seniority( [ $_->heteroatoms ], [ $max_value->heteroatoms ] ) } @chains; } sub pick_chain_with_lowest_attachments_alphabetically { my( @chains ) = @_; my @locant_names = map { [ $_->locant_names ] } @chains; my @sorted = sort { cmp_attachments( $locant_names[$a], $locant_names[$b] ) } 0..$#locant_names; return $chains[$sorted[0]]; } sub most_senior_groups { my( @vertices ) = @_; my $graph; if( @vertices == 1 && blessed $vertices[0] && $vertices[0]->isa( Graph::Undirected:: ) ) { # Graph given instead of an array of vertices $graph = shift @vertices; @vertices = $graph->vertices; } my @groups = grep { blessed $_ && $_->isa( ChemOnomatopist::Group:: ) && !$_->is_prefix_only } @vertices; # TODO: For now grep is used, in future only ChemOnomatopist::Group subclasses should remain if( $graph && $graph->isa( ChemOnomatopist::MolecularGraph:: ) ) { push @groups, grep { $_->isa( ChemOnomatopist::Group:: ) } $graph->groups; } return unless @groups; my( $most_senior_group ) = sort { ChemOnomatopist::Group::cmp( $a, $b ) } @groups; return grep { !ChemOnomatopist::Group::cmp( $_, $most_senior_group ) } @groups; } # Given two lists of heteroatoms, return the one with the most senior ones sub cmp_heteroatom_counts { my( $A, $B ) = @_; my( %A, %B ); for (@$A) { $A{$_}++ } for (@$B) { $B{$_}++ } my @elements = sort { $elements{$a}->{seniority} <=> $elements{$b}->{seniority} } grep { $elements{$_}->{seniority} >= 5 } # O and after keys %elements; for (@elements) { $A{$_} = 0 unless $A{$_}; $B{$_} = 0 unless $B{$_}; return $B{$_} <=> $A{$_} if $B{$_} <=> $A{$_}; } return 0; } sub cmp_heteroatom_seniority { my( $A, $B ) = @_; for (0..$#$A) { next unless $elements{$A->[$_]}->{seniority} <=> $elements{$B->[$_]}->{seniority}; return $elements{$A->[$_]}->{seniority} <=> $elements{$B->[$_]}->{seniority}; } return 0; } # BBv2 P-82.2.1 sub cmp_isotopes { my @mass; my @symbol; for (@_) { if( /^(\d+)(\D+)$/ ) { push @mass, $1; push @symbol, $2; } } return $symbol[0] cmp $symbol[1] || $mass[0] cmp $mass[1]; } # Sorts given names only based on alphabetical part of the name. # tert compounds are ordered according to BBv2 P-14.5 which says: # "[t]he preferred order for alphanumerical order is: nonitalic Roman letters > italic letters > Greek letters." sub cmp_only_aphabetical { my( $a, $b ) = @_; # Dropping hydrogen indicators $a =~ s/^\d+H-//; $b =~ s/^\d+H-//; $a =~ s/[^a-zA-Z]+//g; $b =~ s/[^a-zA-Z]+//g; my $a_has_tert = $a =~ s/^tert(butyl)$/$1/; my $b_has_tert = $b =~ s/^tert(butyl)$/$1/; return $a_has_tert <=> $b_has_tert if $a_has_tert <=> $b_has_tert; return $a cmp $b; } # Sorts arrays from lowest to biggest by values sub cmp_arrays { my( $a, $b ) = @_; for (0..min( scalar( @$a ), scalar( @$b ) )-1) { return $a->[$_] <=> $b->[$_] if $a->[$_] <=> $b->[$_]; } return @$a <=> @$b; } # Sorts given names only based on alphabetical part of the name sub cmp_attachments { my( $a, $b ) = @_; my @a = @{$a}; my @b = @{$b}; for (0..$#a) { my $a_alpha = $a[$_]; my $b_alpha = $b[$_]; my @A = ref $a_alpha eq 'ARRAY' ? sort @$a_alpha : ( $a_alpha ); my @B = ref $b_alpha eq 'ARRAY' ? sort @$b_alpha : ( $b_alpha ); for (0..min( scalar( @A ), scalar( @B ) )-1) { my $a_alpha = $A[$_]; my $b_alpha = $B[$_]; $a_alpha =~ s/[^a-zA-Z]+//g; $b_alpha =~ s/[^a-zA-Z]+//g; my $a_has_tert = $a_alpha =~ s/^tert(butyl)$/$1/; my $b_has_tert = $b_alpha =~ s/^tert(butyl)$/$1/; return $a_has_tert <=> $b_has_tert if $a_has_tert <=> $b_has_tert; return $a_alpha cmp $b_alpha if $a_alpha cmp $b_alpha; } } return 0; } # According to https://en.wikipedia.org/wiki/IUPAC_numerical_multiplier sub IUPAC_numerical_multiplier { my( $N, $is_middle ) = @_; my $ones = $N % 10; my $tens = int( $N / 10 ) % 10; my $hundreds = int( $N / 100 ) % 10; my $thousands = int( $N / 1000 ) % 10; my @prefix = ( '', 'hen', 'di', 'tri', 'tetra', 'penta', 'hexa', 'hepta', 'octa', 'nona' ); return 'heni' if $N == 1 && $is_middle; return 'mono' if $N == 1; return 'do' if $N == 2 && $is_middle; if( $N < 10 ) { my $value = $prefix[$ones]; $value =~ s/a$// unless $is_middle; return $value; } return 'dec' . ($is_middle ? 'a' : '') if $N == 10; return 'undec' . ($is_middle ? 'a' : '') if $N == 11; return IUPAC_numerical_multiplier( $ones, 1 ) . 'dec' . ($is_middle ? 'a' : '') if $N < 20; return 'icos' . ($is_middle ? 'a' : '') if $N == 20; return IUPAC_numerical_multiplier( $ones, 1 ) . 'cos' . ($is_middle ? 'a' : '') if $N < 30; if( $N < 100 ) { return ($ones == 1 ? $prefix[$ones] : IUPAC_numerical_multiplier( $ones, 1 )) . IUPAC_numerical_multiplier( $tens, 1 ) . ($tens == 3 ? 'a' : '') . 'cont' . ($is_middle ? 'a' : ''); } if( $N < 1000 ) { my $prefix = int( $tens . $ones ) == 1 ? $prefix[$ones] : IUPAC_numerical_multiplier( int( $tens . $ones ), 1 ); $prefix[1] = 'he'; return $prefix . $prefix[$hundreds] . 'ct' . ($is_middle ? 'a' : ''); } if( $N < 10000 ) { $prefix[0] = 'ki'; return IUPAC_numerical_multiplier( int( $hundreds . $tens . $ones ), 1 ) . $prefix[$thousands] . 'li'; } die "cannot generate IUPAC numerical multiplier for $N\n"; } sub IUPAC_complex_numerical_multiplier { my( $N ) = @_; my @multipliers = ( undef, '', 'bis', 'tris' ); return $multipliers[$N] if $N < @multipliers; return IUPAC_numerical_multiplier( $N, 1 ) . 'kis'; } sub alkane_chain_name($) { my( $N ) = @_; die "alkane chain of zero length detected\n" unless $N; my @names = qw( ? meth eth prop but ); return $names[$N] if $N < @names; return IUPAC_numerical_multiplier( $N ); } sub unbranched_chain_name($) { my( $chain ) = @_; my @chain = $chain->vertices; my $name = ChemOnomatopist::Name->new; if( $chain->length == 1 && !blessed $chain[0] && !is_element( @chain, 'C' ) ) { $name .= 'ne'; # Leaving element prefix appending to the caller return $name; } my @bonds = $chain->bonds; my @double = grep { $bonds[$_] eq '=' } 0..$#bonds; my @triple = grep { $bonds[$_] eq '#' } 0..$#bonds; # BBv2 P-63.2.2.2 if( $chain->parent && (all { !blessed $_ } @chain) && is_element( $chain[0], 'O' ) && !@double && !@triple && all { is_element( $_, 'C' ) } @chain[1..$#chain] ) { $name->append_stem( alkane_chain_name( $chain->length - 1 ) ); $name .= 'oxy'; return $name; } if( $chain->isa( ChemOnomatopist::Chain::Amide:: ) || $chain->isa( ChemOnomatopist::Chain::Amine:: ) ) { $name->append_stem( alkane_chain_name scalar grep { !blessed $_ } $chain->vertices ); } elsif( (any { is_element( $_, 'C' ) } @chain) || scalar( uniq map { element $_ } @chain ) > 1 ) { $name->append_stem( alkane_chain_name $chain->length ); } if( @double ) { $name .= 'a' if @double >= 2; # BBv2 P-16.8.2 if( $chain->needs_multiple_bond_locants || @double > 1 || @triple ) { $name->append_locants( $chain->bond_locants( @double ) ); } if( @double > 1 ) { my $multiplier = IUPAC_numerical_multiplier scalar @double; $multiplier .= 'a' unless $multiplier =~ /i$/; # BBv2 P-31.1.1.2 $name->append_multiplier( $multiplier ); } $name .= 'en'; } if( @triple ) { $name .= 'a' if @triple >= 2 && !@double; # BBv2 P-16.8.2 if( $chain->needs_multiple_bond_locants || @triple > 1 || @double ) { $name->append_locants( $chain->bond_locants( @triple ) ); } if( @triple > 1 ) { my $multiplier = IUPAC_numerical_multiplier scalar @triple; $multiplier .= 'a' unless $multiplier =~ /i$/; # BBv2 P-31.1.1.2 $name->append_multiplier( $multiplier ); } $name .= 'yn'; } $name .= ChemOnomatopist::Name::Part::AlkaneANSuffix->new( 'an' ) unless @double || @triple; $name .= 'e'; return $name; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/000077500000000000000000000000001452012116100204775ustar00rootroot00000000000000ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain.pm000066400000000000000000000321441452012116100220630ustar00rootroot00000000000000package ChemOnomatopist::Chain; use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Chain::Ether; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Group::Carboxyl; use ChemOnomatopist::Group::Ether; use ChemOnomatopist::Util::SMILES qw( path_SMILES ); use Graph::Traversal::DFS; use List::Util qw( all any sum0 uniq ); use Scalar::Util qw( blessed ); use Set::Object qw( set ); # ABSTRACT: Chain of atoms # VERSION sub vertices(); sub new { my( $class, $graph, $parent, @vertices ) = @_; # TODO: For now only chains with a single oxygen atom are detected as ethers. # First of all, ChemOnomatopist::Chain::Ether is not very clever. # Second, BBv2 P-63.2.4.1 does not draw a clear line between ether naming and skeletal replacement nomenclature. my $self; if( (grep { blessed $_ && $_->isa( ChemOnomatopist::Group::Ether:: ) } @vertices) == 1 ) { $self = ChemOnomatopist::Chain::Ether->new( $graph, $parent, @vertices ); } elsif( blessed $vertices[0] && $vertices[0]->isa( ChemOnomatopist::Group::Amine:: ) ) { my $amine = shift @vertices; my $chain = bless { vertices => \@vertices, graph => $graph }, $class; $self = ChemOnomatopist::Chain::Amine->new( $graph, $chain, $amine ); } else { $self = { vertices => \@vertices, graph => $graph, cache => {} }; $self->{parent} = $parent if $parent; $self = bless $self, $class; } return $self; } # Accessors sub graph() { my( $self ) = @_; return $self->{graph}; } sub parent(;$) { my( $self, $parent ) = @_; my $old_parent = exists $self->{parent} ? $self->{parent} : undef; $self->{parent} = $parent if $parent; # TODO: Maybe invalidate the related cache return $old_parent; } sub substituents() { my( $self ) = @_; my $vertices = set( $self->vertices ); my @substituents; for my $vertex ($self->vertices) { for my $neighbour ($self->graph->neighbours( $vertex )) { next if $vertices->has( $neighbour ); push @substituents, $neighbour; } } return @substituents; } sub vertices() { my( $self ) = @_; return @{$self->{vertices}}; } # Properties sub backbone_SMILES() { my( $self ) = @_; return path_SMILES( $self->graph, $self->vertices ); } sub bonds() { my( $self ) = @_; return @{$self->{bonds}} if $self->{bonds}; my $graph = $self->graph; my @vertices = $self->vertices; my @bonds; for (0..$self->length-2) { if( $graph->has_edge_attribute( @vertices[$_ .. $_+1], 'bond' ) ) { push @bonds, $graph->get_edge_attribute( @vertices[$_ .. $_+1], 'bond' ); } else { push @bonds, '-'; } } $self->{bonds} = \@bonds; return @bonds; } sub branch_positions() { my( $self ) = @_; return @{$self->{branch_positions}} if $self->{branch_positions}; my $graph = $self->_disconnected_chain_graph; my @vertices = $self->vertices; my @branch_positions = map { ( $_ ) x $graph->degree( $vertices[$_] ) } grep { $graph->degree( $vertices[$_] ) } 0..$#vertices; $self->{branch_positions} = \@branch_positions; return @branch_positions; } sub group_positions { my( $self, $class ) = @_; return @{$self->{group_positions}{$class}} if $self->{group_positions}{$class}; my $graph = $self->_disconnected_chain_graph; my @vertices = $self->vertices; my @group_positions; for (0..$#vertices) { my $groups = grep { blessed $_ && $_->isa( $class ) } $graph->neighbours( $vertices[$_] ); next unless $groups; push @group_positions, ( $_ ) x $groups; } $self->{group_positions}{$class} = \@group_positions; return @group_positions; } sub heteroatom_positions() { my( $self ) = @_; return @{$self->{heteroatom_positions}} if $self->{heteroatom_positions}; my @vertices = $self->vertices; my @heteroatom_positions; for (0..$#vertices) { next if blessed $vertices[$_] && !$vertices[$_]->isa( ChemOnomatopist::Group::Ether:: ); next if ChemOnomatopist::is_element( $vertices[$_], 'C' ); push @heteroatom_positions, $_; } $self->{heteroatom_positions} = \@heteroatom_positions; return @heteroatom_positions; } # CHECKME: Can chains have indicated hydrogens? sub indicated_hydrogens() { my @hydrogen_positions; return @hydrogen_positions } sub is_hydrocarbon() { my( $self ) = @_; return $self->number_of_heteroatoms == 0; } sub is_saturated() { my( $self ) = @_; return all { $_ eq '-' } $self->bonds; } # Returns maximum number of substitutable locations sub max_valence() { my( $self ) = @_; my $max_valence = 0; for my $vertex ($self->vertices) { my $element = ChemOnomatopist::element( $vertex ); next unless $element; next if !exists $elements{$element}; next if !exists $elements{$element}->{standard_bonding_number}; $max_valence += $elements{$element}->{standard_bonding_number}; } for my $bond ($self->bonds) { $max_valence -= 2 if $bond eq '-'; $max_valence -= 3 if $bond eq ':'; # Aromatic bond is 1.5 $max_valence -= 4 if $bond eq '='; $max_valence -= 6 if $bond eq '#'; $max_valence -= 8 if $bond eq '$'; } return $max_valence; } sub most_senior_groups() { my( $self ) = @_; return ChemOnomatopist::most_senior_groups( $self->vertices, $self->substituents ); } sub most_senior_group_positions() { my( $self ) = @_; return @{$self->{most_senior_group_positions}} if $self->{most_senior_group_positions}; my @groups = ChemOnomatopist::most_senior_groups( $self->vertices, $self->substituents ); $self->{most_senior_group_positions} = []; return () unless @groups; my $groups = set( @groups ); my @vertices = $self->vertices; my $vertices = set( @vertices ); my @positions; for (0..$#vertices) { my $vertex = $vertices[$_]; push @positions, $_ if $groups->has( $vertex ); for my $neighbour ($self->{graph}->neighbours( $vertex )) { next if $vertices->has( $neighbour ); push @positions, $_ if $groups->has( $neighbour ); } } $self->{most_senior_group_positions} = \@positions; return @positions; } sub multiple_bond_positions() { my( $self ) = @_; my @bonds = $self->bonds; return grep { $bonds[$_] =~ /^[=#\$]$/ } 0..$#bonds; } sub needs_multiple_bond_locants() { my( $self ) = @_; return $self->length > 2; } sub needs_multiple_bond_suffix() { my( $self ) = @_; my( $first ) = $self->vertices; return 1 unless blessed $first; return $first->needs_multiple_bond_suffix; } sub needs_heteroatom_locants() { my( $self ) = @_; return '' if $self->length == 1; my @vertices = $self->vertices; # Check if this is -oxy substituent if( $self->parent && !$self->number_of_branches && $self->number_of_heteroatoms == 1 && $vertices[0]->{symbol} eq 'O' ) { return ''; } if( scalar( uniq $self->heteroatoms ) == 1 ) { return $self->number_of_heteroatoms != $self->length; } elsif( scalar( uniq $self->heteroatoms ) > 1 ) { return 1; } } sub needs_heteroatom_names() { my( $self ) = @_; my @vertices = $self->vertices; # Check if this is -oxy substituent if( $self->parent && !$self->number_of_branches && $self->number_of_heteroatoms == 1 && $vertices[0]->{symbol} eq 'O' ) { return ''; } # Chalcogen analogues of ethers if( @vertices == 1 && grep { ChemOnomatopist::element( @vertices ) eq $_ } qw( S Se Te ) ) { return ''; } return 1; } sub needs_suffix_locant() { my( $self ) = @_; # BBv2 P-14.3.4.2 (a): mononuclear parent hydrides do not need locants return '' if $self->length == 1; # BBv2 P-14.3.4.2 (b): monosubstituted homogeneous chains consisting of only two identical atoms do not need locants return '' if $self->length == 2 && $self->number_of_branches == 1; my @most_senior_groups = $self->most_senior_groups; return '' unless @most_senior_groups; return 1 if $self->number_of_heteroatoms; # P-15.4.3.2.3: Characteristic groups cited as suffixes are given locants return 1 if !$most_senior_groups[0]->is_carbon || @most_senior_groups > 2; return ''; } sub needs_substituent_locants() { my( $self ) = @_; return '' if $self->length == 1; # FIXME: Make sure the substituents are of the same kind return '' if scalar( $self->substituents ) == $self->max_valence; # Ad-hoc fix for acetic acid substituents if( $self->length == 2 && any { blessed $_ && $_->isa( ChemOnomatopist::Group::Carboxyl:: ) } $self->vertices ) { return ''; } return 1; } sub heteroatoms() { my( $self ) = @_; my @vertices = $self->vertices; return map { ChemOnomatopist::element( $vertices[$_] ) } $self->heteroatom_positions; } sub length() { my( $self ) = @_; return scalar $self->vertices; } sub locant_names() { my( $self ) = @_; return @{$self->{locant_names}} if $self->{locant_names}; my $graph = $self->_disconnected_chain_graph->copy; my @locants; for my $vertex ($self->vertices) { my @current_locants; for my $neighbour ($graph->neighbours( $vertex )) { $graph->delete_edge( $vertex, $neighbour ); next if $self->parent && $self->parent == $neighbour; if( blessed $neighbour ) { push @current_locants, $neighbour->prefix; } else { push @current_locants, ChemOnomatopist::get_sidechain_name( $graph, $vertex, $neighbour ); } } push @locants, \@current_locants; } $self->{locant_names} = \@locants; return @locants; } sub locants(@) { my $self = shift; return map { $_ + 1 } @_; } sub bond_locants(@) { my $self = shift; return map { $_ + 1 } @_; } sub number_of_branches_in_sidechains() { my( $self ) = @_; return $self->{number_of_branches_in_sidechains} if exists $self->{number_of_branches_in_sidechains}; my $graph = $self->_disconnected_chain_graph->copy; my @vertex_neighbours = map { $graph->neighbours( $_ ) } $self->vertices; $graph->delete_vertices( $self->vertices ); # has_vertex() is used to filter out neighbours within the chain my $number = sum0 map { $_ > 2 ? $_ - 2 : 0 } map { $graph->degree( $_ ) } map { Graph::Traversal::DFS->new( $graph, start => $_ )->dfs } grep { $graph->has_vertex( $_ ) } @vertex_neighbours; $self->{number_of_branches_in_sidechains} = $number; return $number; } sub number_of_carbons() { my( $self ) = @_; return $self->{number_of_carbons} if exists $self->{number_of_carbons}; my $graph = $self->_disconnected_chain_graph; my $C = grep { ChemOnomatopist::is_element( $_, 'C' ) } map { Graph::Traversal::DFS->new( $graph, start => $_ )->dfs } $self->vertices; # Since main chain carbons are included in the count, they have to be subtracted. $C -= $self->length; $self->{number_of_carbons} = $C; return $C; } sub number_of_branches() { my( $self ) = @_; return scalar $self->branch_positions; } sub number_of_double_bonds() { my( $self ) = @_; return scalar grep { $_ eq '=' } $self->bonds; } sub number_of_groups { my( $self, $class ) = @_; return scalar $self->group_positions( $class ); } sub number_of_heteroatoms() { my( $self ) = @_; return scalar $self->heteroatom_positions; } sub number_of_indicated_hydrogens() { my( $self ) = @_; return scalar $self->indicated_hydrogens; } sub number_of_multiple_bonds() { my( $self ) = @_; return scalar grep { $_ =~ /^[=#\$]$/ } $self->bonds; } sub prefix() { my( $self ) = @_; # Chalcogen analogues of ethers if( $self->length == 1 ) { return ChemOnomatopist::Name->new( 'sulfan' ) if ChemOnomatopist::is_element( $self->vertices, 'S' ); return ChemOnomatopist::Name->new( 'selan' ) if ChemOnomatopist::is_element( $self->vertices, 'Se' ); return ChemOnomatopist::Name->new( 'tellan' ) if ChemOnomatopist::is_element( $self->vertices, 'Te' ); } return ChemOnomatopist::unbranched_chain_name( $self ); # FIXME: Add proper suffix } sub suffix() { my( $self ) = @_; return ChemOnomatopist::unbranched_chain_name( $self ); } sub vertex_ids { my $self = shift; my %ids = map { $self->{vertices}[$_] => $_ } 0..$self->length-1; return map { exists $ids{$_} ? $ids{$_} : undef } @_; } sub _cmp_instances { my( $A, $B ) = @_; # For now, just compare the sizes of cycles. # TODO: Proper ordering should be implemented as per BBv2 P-25.8.1 return $B->length <=> $A->length; } sub _disconnected_chain_graph() { my( $self ) = @_; my $graph = $self->graph->copy; my $vertices = set( $self->vertices ); $graph->delete_edges( map { @$_ } grep { $vertices->has( @$_ ) } $graph->edges ); return $graph; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/000077500000000000000000000000001452012116100215215ustar00rootroot00000000000000ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Amide.pm000066400000000000000000000015621452012116100231020ustar00rootroot00000000000000package ChemOnomatopist::Chain::Amide; use strict; use warnings; # ABSTRACT: Amide chain # VERSION use ChemOnomatopist; use ChemOnomatopist::Name; use List::Util qw( first ); use Scalar::Util qw( blessed ); sub AUTOLOAD { our $AUTOLOAD; my $call = $AUTOLOAD; $call =~ s/.*:://; return if $call eq 'DESTROY'; my $self = shift; return $self->{chain}->can( $call )->( $self->{chain}, @_ ); } sub new { my( $class, $graph, $chain, $amide ) = @_; return bless { graph => $graph, chain => $chain, amide => $amide }; } sub vertices() { my $self = shift; my @vertices = ( $self->{amide}, $self->{chain}->vertices ); return @vertices; } sub locants(@) { my $self = shift; return map { $_ ? $_ : 'N' } @_; } sub needs_substituent_locants() { return 1 } sub suffix() { my( $self ) = @_; return $self->{chain}->suffix; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Amine.pm000066400000000000000000000026721452012116100231170ustar00rootroot00000000000000package ChemOnomatopist::Chain::Amine; use strict; use warnings; # ABSTRACT: Amine chain # VERSION use ChemOnomatopist; use ChemOnomatopist::Name; use List::Util qw( first ); use Scalar::Util qw( blessed ); sub AUTOLOAD { our $AUTOLOAD; my $call = $AUTOLOAD; $call =~ s/.*:://; return if $call eq 'DESTROY'; my $self = shift; return $self->{chain}->can( $call )->( $self->{chain}, @_ ); } sub new { my( $class, $graph, $chain, $amine ) = @_; return bless { graph => $graph, chain => $chain, amine => $amine }; } sub vertices() { my $self = shift; my @vertices = ( $self->{amine}, $self->{chain}->vertices ); return @vertices; } sub locants(@) { my $self = shift; return map { $_ ? $self->{chain}->locants( $_ - 1 ) : 'N' } @_; } sub needs_substituent_locants() { my( $self ) = @_; return $self->{chain}->length > 0; } sub prefix() { my( $self ) = @_; my $prefix = $self->{chain}->prefix; $prefix .= 'amino'; return $prefix; } sub suffix() { my( $self ) = @_; my $suffix = $self->{chain}->suffix; return $suffix unless $self->{chain}->needs_suffix_locant; return $suffix if $self->{chain}->length == 2; # Ad-hoc fix for ethanamines my $neighbour = first { $self->graph->has_edge( $self->{amine}, $_ ) } $self->{chain}->vertices; return $suffix->append_locants( $self->{chain}->locants( $self->vertex_ids( $neighbour ) ) ); } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Bicycle.pm000066400000000000000000000406201452012116100234330ustar00rootroot00000000000000package ChemOnomatopist::Chain::Bicycle; use strict; use warnings; # ABSTRACT: Fused bicyclic chain # VERSION use ChemOnomatopist; use ChemOnomatopist::Chain::Circular; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Chain::Bicycle::Purine; use ChemOnomatopist::Chain::Monocycle; use ChemOnomatopist::Chain::Monocycle::Fused; use ChemOnomatopist::Name; use ChemOnomatopist::Name::Part::Fusion; use ChemOnomatopist::Name::Part::Stem; use ChemOnomatopist::Util::SMILES qw( cycle_SMILES ); use Chemistry::OpenSMILES qw( is_double_bond ); use Graph::Traversal::DFS; use List::Util qw( all any min uniq ); use Scalar::Util qw( blessed ); use Set::Object qw( set ); use parent ChemOnomatopist::Chain::Circular::; # From BBv2 P-25.2.1 our @names = ( [ 'n:c:n:c:c:c:', 'n:c:c:n:c:c:', 'pteridine' ], [ 'c:n:c:n:c:c:', 'n:c:c:n:c:c:', 'pteridine' ], [ 'n:n:c:c:c:c:', 'c:c:c:c:c:c:', 'cinnoline' ], [ 'N=CN=CCC', 'c:c:c:c:c:c:', 'quinazoline' ], [ 'N=CC=NCC', 'c:c:c:c:c:c:', 'quinoxaline' ], [ 'n:c:c:n:c:c:', 'c:c:c:c:c:c:', 'quinoxaline' ], [ 'c:c:c:n:c:c:', 'n:c:c:c:c:c:', '1,5-naphthyridine' ], # TODO: There are isomers [ 'c:n:n:c:c:c:', 'c:c:c:c:c:c:', 'phthalazine' ], [ 'n:c:c:c:c:c:', 'c:c:c:c:c:c:', 'quinoline' ], [ 'C=NC=CCC', 'c:c:c:c:c:c:', 'isoquinoline' ], [ 'c:n:c:c:c:c:', 'c:c:c:c:c:c:', 'isoquinoline' ], [ 'CC=CCNC', 'C=CC=CCN', 'quinolizine' ], [ 'n:n:c:c:c:', 'c:c:c:c:c:c:', '1H-indazole' ], [ 'n:c:c:c:c:', 'c:c:c:c:c:c:', '1H-indole' ], [ 'c:n:c:c:c:', 'c:c:c:c:c:c:', 'isoindole' ], [ 'c:c:c:c:n:', 'c:c:c:c:n:c:', 'indolizine', ], [ 'CC=Cn:c', 'c:c:c:c:n:', '1H-pyrrolizine' ], # TODO: There are isomers ); for my $name (qw( 1H-indole indolizine isoindole isoquinoline quinoline quinolizine )) { for (grep { $_->[2] eq $name } @names) { my @As_parts = @$_; $As_parts[0] =~ s/N/\[As\]/g; $As_parts[1] =~ s/N/\[As\]/g; $As_parts[2] =~ s/^1H-//; $As_parts[2] = 'ars' . $As_parts[2] unless $As_parts[2] =~ s/^iso/isoars/; push @names, \@As_parts; my @P_parts = @$_; $P_parts[0] =~ s/n/p/g; $P_parts[1] =~ s/n/p/g; $P_parts[2] =~ s/^1H-//; $P_parts[2] = 'phosph' . $P_parts[2] unless $P_parts[2] =~ s/^iso/isophosph/; push @names, \@P_parts; } } # From BBv2 P-25.1.1, order of decreasing seniority our %hydrocarbons_by_size = ( '5,7' => 'azulene', '6,6' => 'naphthalene', '5,6' => 'indene', ); sub new { my( $class, $graph, @vertices ) = @_; # Has to be created early to be given for fused parts my $self = bless { graph => $graph, vertices => \@vertices }, $class; my $subgraph = $graph->subgraph( \@vertices ); my @bridge = grep { $subgraph->degree( $_ ) == 3 } @vertices; $subgraph->delete_edge( @bridge ); $self->{vertices} = [ Graph::Traversal::DFS->new( $subgraph, start => $bridge[0] )->dfs ]; $subgraph->delete_vertices( @bridge ); # Graph is broken into components. # Each component is represented as an array of vertices in the order of traverse. my @components = sort { @$a <=> @$b } $subgraph->connected_components; for (0..1) { my $subgraph = $graph->subgraph( [ @{$components[$_]}, @bridge ] ); $subgraph->delete_edge( @bridge ); my @path = Graph::Traversal::DFS->new( $subgraph, start => $bridge[$_] )->dfs; push @path, shift @path; $components[$_] = \@path; } my @cycles = map { ChemOnomatopist::Chain::Monocycle::Fused->new( $graph, $self, @$_ ) } @components; $self->{cycles} = \@cycles; my $nbenzene = scalar grep { $_->is_benzene } @cycles; # The ordering should not be done if one of the cycles is benzene if( $nbenzene == 0 ) { # The following code is supposed to order the rings _and_ establish the traversal order # TODO: Maybe all traversals of both rings should be generated here? # FIXME: There seem to be two orders for rings: one for numbering, other for fusion naming... my @candidates = map { ChemOnomatopist::Chain::Monocycle->new( $_->graph, $_->vertices ) } ( @cycles, map { $_->flipped } @cycles ); for my $rule ( # P-25.3.2.4 (a): Senior heteroatom according to specific seniority order \&rule_most_senior_heteroatom, # TODO: P-25.3.2.4 (b): Concerns fusions of more than two rings # P-25.3.2.4 (c): Second ring has to be larger \&ChemOnomatopist::rule_longest_chains, # P-25.3.2.4 (d): Greater number of heteroatoms of any kind \&ChemOnomatopist::rule_most_heteroatoms, # P-25.3.2.4 (e): Greater variety of heteroatoms \&rule_greatest_variety_of_heteroatoms, # P-25.3.2.4 (f): Greater number of most senior heteroatoms \&ChemOnomatopist::rule_greatest_number_of_most_senior_heteroatoms, # TODO: P-25.3.2.4 (g): Concerns fusions of more than two rings # P-25.3.2.4 (h): Lower locants for heteroatoms \&ChemOnomatopist::rule_lowest_numbered_heteroatoms, # P-25.3.2.4 (i): Lower locants for senior heteroatoms \&ChemOnomatopist::rule_lowest_numbered_most_senior_heteroatoms, # TODO: P-25.3.2.4 (j): Concerns fusions of more than two rings ) { my @candidates_now = $rule->( @candidates ); if( @candidates_now == 1 ) { @candidates = @candidates_now; last; } elsif( @candidates ) { @candidates = @candidates_now; } else { last; } } # Here the "winning" ring is selected my $chain = shift @candidates; # Making the "winning" ring the first if( set( $chain->vertices ) == set( $cycles[1]->vertices ) ) { @cycles = reverse @cycles; } # Now we have to match the direction, I guess my @bridge_in_chain = grep { set( @bridge )->has( $_ ) } $chain->vertices; if( $bridge_in_chain[-1] != $cycles[0]->{vertices}[-1] ) { @cycles = map { $_->flipped } @cycles; } $self->{cycles} = \@cycles; $self->_adjust_vertices_to_cycles; if( join( ',', map { $_->backbone_SMILES } @cycles ) =~ /^c:n:c:n:c:c:,n:c:n:c:c:$/ || join( ',', map { $_->backbone_SMILES } @cycles ) =~ /^CNC=Nc:c,n:c:n:c:c:$/ ) { return ChemOnomatopist::Chain::Bicycle::Purine->new( $graph, @cycles ); } elsif( join( ',', map { $_->backbone_SMILES } @cycles ) =~ /^N=CNCc:c,n:c:n:c:c:$/ ) { return ChemOnomatopist::Chain::Bicycle::Purine->new( $graph, map { $_->flipped } @cycles ); } } elsif( $nbenzene == 1 ) { # Numbering has to start from cycle other than benzene if( $cycles[0]->is_benzene ) { @cycles = reverse @cycles; $self->{cycles} = \@cycles; } my( $chain ) = sort { ChemOnomatopist::Chain::Monocycle::_cmp( $a, $b ) } ( $cycles[0], $cycles[0]->flipped ); if( $chain != $cycles[0] ) { @cycles = map { $_->flipped } @cycles; $self->{cycles} = \@cycles; } $self->_adjust_vertices_to_cycles; } return $self; } sub candidates() { my( $self ) = @_; if( $self->is_naphthalene ) { # Generates all variants my @chains = ( $self, $self->copy, $self->copy, $self->copy ); $chains[1]->{cycles} = [ map { $_->flipped } $chains[1]->cycles ]; $chains[3]->{cycles} = [ map { $_->flipped } $chains[3]->cycles ]; $chains[2]->{cycles} = [ reverse $chains[2]->cycles ]; $chains[3]->{cycles} = [ reverse $chains[3]->cycles ]; for (@chains) { $_->_adjust_vertices_to_cycles; $_->{candidate_for} = $self unless $_ == $self; } return @chains; } return $self; } sub copy() { my( $self ) = @_; return bless { graph => $self->graph, cycles => [ $self->cycles ], vertices => [ $self->vertices ], parent => $self->parent }, ChemOnomatopist::Chain::Bicycle::; } sub cycles() { my( $self ) = @_; return @{$self->{cycles}}; } sub parent(;$) { my( $self, $parent ) = @_; my $old_parent = $self->SUPER::parent( $parent ); return $old_parent unless $parent; return $old_parent if $old_parent && $parent == $old_parent; if( $self->is_naphthalene ) { my( $chain ) = ChemOnomatopist::filter_chains( $self->candidates ); $self->{vertices} = [ $chain->vertices ]; } return $old_parent; } sub has_form($$) { my( $class, $graph ) = @_; my %degrees = map { $graph->degree( $_ ) => 1 } $graph->vertices; return '' unless join( ',', sort keys %degrees ) eq '2,3'; my @d3 = grep { $graph->degree( $_ ) == 3 } $graph->vertices; return '' unless @d3 == 2; return '' unless $graph->has_edge( @d3 ); $graph = $graph->copy->delete_vertices( @d3 ); return '' unless scalar( $graph->connected_components ) == 2; return 1; } # Tells whether the outer bonds of the bicycle qualify as aromatic sub is_aromatic() { my( $self ) = @_; my @outer_vertices; for ($self->cycles) { my @vertices = $_->vertices; pop @vertices; push @outer_vertices, @vertices; } return ChemOnomatopist::Chain::Circular->new( $self->graph, @outer_vertices )->is_aromatic; } sub is_hydrocarbon() { my( $self ) = @_; return all { $_->is_hydrocarbon } $self->cycles; } sub is_naphthalene() { my( $self ) = @_; return $self->is_hydrocarbon && all { $_->length == 6 } $self->cycles; } sub needs_heteroatom_locants() { my( $self ) = @_; return $self->suffix =~ /^benzo/; } sub needs_heteroatom_names() { return '' } # FIXME: This is not always correct sub needs_substituent_locants() { return 1 } sub prefix() { my( $self ) = @_; my $name = $self->suffix; $name = ChemOnomatopist::Name->new( $name ) unless blessed $name; $name->{name}[-1] =~ s/e$//; if( $self->parent ) { # FIXME: Not stable for naphthalene my @vertices = $self->vertices; my( $position ) = grep { $self->graph->has_edge( $self->parent, $vertices[$_] ) } 0..$#vertices; die "unknown locant in multicyclic compound\n" unless defined $position; $name->append_substituent_locant( $self->locants( $position ) ); } $name .= 'yl'; return $name; } # FIXME: This is a bit strange: class and object method with the same name sub suffix() { my( $self ) = @_; return '' unless ref $self; if( $self->is_hydrocarbon ) { # FIXME: Check if aromatic, but with caution, as substitutions will break aromaticity my $cycle_sizes = join ',', map { $_->length } $self->cycles; if( exists $hydrocarbons_by_size{$cycle_sizes} ) { return ChemOnomatopist::Name::Part::Stem->new( $hydrocarbons_by_size{$cycle_sizes} )->to_name; } if( $cycle_sizes =~ /^(\d+),\1$/ ) { my $name = ChemOnomatopist::alkane_chain_name( $1 ) . 'alene'; return ChemOnomatopist::Name::Part::Stem->new( $name )->to_name; } } my @SMILES = map { $_->backbone_SMILES } $self->cycles; print STDERR "bicycle SMILES: @SMILES\n" if $ChemOnomatopist::DEBUG; my( $retained ) = grep { ($_->[0] eq $SMILES[0] && $_->[1] eq $SMILES[1]) || ($_->[0] eq $SMILES[1] && $_->[1] eq $SMILES[0]) } @names; return ChemOnomatopist::Name::Part::Stem->new( $retained->[2] )->to_name if $retained; if( any { $_->is_benzene } $self->cycles ) { my( $other ) = grep { !$_->is_benzene } $self->cycles; $other = ChemOnomatopist::Chain::Monocycle->new( $other->graph, $other->vertices ); my $SMILES = $other->backbone_SMILES; if( $SMILES =~ /^C=C((?O|S|\[Se\]|\[Te\])C|C(?O|S|\[Se\]|\[Te\]))c:c$/ ) { # Names according to BBv2 P-25.2.1, Table 2.8, (23) and (24) my $element = $+{el}; my $name = ($1 =~ /^C/ ? '2H-1-' : '1H-2-') . 'benzo'; $element =~ s/[\[\]]//g; if( $element ne 'O' ) { $name .= $elements{$element}->{prefix}; $name =~ s/a$/o/; } return $name . 'pyran'; } else { my $name = ChemOnomatopist::Name->new( 'benzo' ); my $other_name = $other->suffix; if( $other_name->starts_with_locant ) { # Locants are moved to front unshift @$name, shift @$other_name; } $name->[-1] =~ s/o$// if $other_name->[0] =~ /^a/; $name .= $other_name; return $name; } } # Fusion naming according to BBv2 P-25.3.1.3 # Find the bridge vertices my @bridge = $self->{cycles}[0]->vertices; @bridge = @bridge[-2..-1]; # Find autosymmetric equivalents having the least locants for the bridge my @equiv_A = $self->{cycles}[0]->autosymmetric_equivalents; my @equiv_B = $self->{cycles}[1]->autosymmetric_equivalents; my $min_A = min map { $_->vertex_ids( @bridge ) } @equiv_A; @equiv_A = grep { min( $_->vertex_ids( @bridge ) ) == $min_A } @equiv_A; my $min_B = min map { $_->vertex_ids( @bridge ) } @equiv_B; @equiv_B = grep { min( $_->vertex_ids( @bridge ) ) == $min_B } @equiv_B; my $fusion = '['; if( @equiv_A > 1 || @equiv_B > 1 ) { # At least one of the rings has mirror symmetry ("flip-symmetric"), thus numeric order is ascending $fusion .= ($min_B+1) . ',' . ($min_B+2); } else { # Rings are rigid, thus numeric order has to be derived my @order_A = $equiv_A[0]->vertex_ids( @bridge ); my @order_B = $equiv_B[0]->vertex_ids( @bridge ); if( ($order_A[0] <=> $order_A[1]) == ($order_B[0] <=> $order_B[1]) ) { # Ring atoms are encountered in the same order in both of the rings $fusion .= ($min_B+1) . ',' . ($min_B+2); } else { # Ring atom orders differ $fusion .= ($min_B+2) . ',' . ($min_B+1); } } $fusion .= '-' . chr( 97 + $min_A ) . ']'; my $name = ChemOnomatopist::Name->new; my $graph = $self->graph; $name->append_locants( map { $_ . 'H' } $self->locants( $self->indicated_hydrogens ) ); my @ideal = map { ChemOnomatopist::Chain::Monocycle->new( $_->graph, $_->vertices ) } $self->cycles; my $name_A = $ideal[1]->name; $name_A =~ s/^\d+H-//; # TODO: Complete retained prefixes from BBv2 P-25.3.2.2.3 $name_A = 'fur' if $name_A eq 'furan'; $name_A = 'imidaz' if $name_A eq 'imidazole'; $name_A = 'pyrid' if $name_A eq 'pyridine'; $name_A = 'pyrimid' if $name_A eq 'pyrimidine'; $name_A = 'thien' if $name_A eq 'thiophene'; $name .= $name_A; unless( $name->[-1] =~ s/e$/o/ ) { # BBv2 P-25.3.2.2.2 $name->[-1] .= 'o'; } $name .= ChemOnomatopist::Name::Part::Fusion->new( $fusion ); my $name_B = $ideal[0]->name; $name_B->[0] =~ s/\d+H-//; $name .= $name_B; $name->bracket_numeric_locants; return $name; } sub rule_most_senior_heteroatom { my( @chains ) = @_; # This order is taken from BBv2 P-25.3.2.4 (a) and is different from order in %elements my @element_order = qw( N F Cl Br I O S Se Te P As Sb Bi Si Ge Sn Pb B Al Ga In Tl ); my %element_order = map { $element_order[$_] => $_ } 0..$#element_order; my( $max_value ) = sort { $element_order{$a} <=> $element_order{$b} } grep { exists $element_order{$_} } map { $_->heteroatoms } @chains; return @chains unless $max_value; return grep { any { $_ eq $max_value } $_->heteroatoms } @chains; } sub rule_greatest_variety_of_heteroatoms { my( @chains ) = @_; my( $max_value ) = reverse sort map { scalar uniq $_->heteroatoms } @chains; return @chains unless $max_value; return grep { scalar( uniq $_->heteroatoms ) == $max_value } @chains; } sub _adjust_vertices_to_cycles() { my( $self ) = @_; my @cycles = $self->cycles; $self->{vertices} = []; push @{$self->{vertices}}, $cycles[0]->vertices; pop @{$self->{vertices}}; push @{$self->{vertices}}, $cycles[1]->vertices; pop @{$self->{vertices}}; return $self; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Bicycle/000077500000000000000000000000001452012116100230735ustar00rootroot00000000000000ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Bicycle/Purine.pm000066400000000000000000000012441452012116100246740ustar00rootroot00000000000000package ChemOnomatopist::Chain::Bicycle::Purine; use strict; use warnings; # ABSTRACT: Purine chain # VERSION use parent ChemOnomatopist::Chain::Bicycle::; sub new { my( $class, $graph, $pyrimidine, $imidazole ) = @_; my @vertices = ( @{$pyrimidine->{vertices}}[1..5], $pyrimidine->{vertices}[0], reverse @{$imidazole->{vertices}}[0..2] ); return bless { graph => $graph, cycles => [ $pyrimidine, $imidazole ], vertices => \@vertices }; } sub locants(@) { my $self = shift; return map { $_ + 1 } @_; } sub suffix() { return ChemOnomatopist::Name->new( 'purine' ) } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Carboxamide.pm000066400000000000000000000023201452012116100242720ustar00rootroot00000000000000package ChemOnomatopist::Chain::Carboxamide; use strict; use warnings; # ABSTRACT: Carboxamide chain # VERSION use parent ChemOnomatopist::Chain::; use List::Util qw( first ); sub new { my( $class, $graph, $amide, $C, $chain ) = @_; $chain->parent( $C ); return bless { graph => $graph, chain => $chain, vertices => [ $amide, $C, $chain->vertices ] }, $class; } sub needs_heteroatom_locants() { return '' } sub needs_heteroatom_names() { return '' } sub locants(@) { my $self = shift; return map { $_ > 1 ? $_ - 1 : $_ ? '?' : 'N' } @_; } # FIXME: This is a source of possible failures sub prefix() { return 'benzamido' } sub suffix() { my( $self ) = @_; return 'benz' if $self->{chain}->is_benzene; my $suffix = $self->{chain}->suffix; if( !$self->{chain}->isa( ChemOnomatopist::Chain::Monocycle:: ) || $self->{chain}->needs_substituent_locants ) { my @vertices = $self->{chain}->vertices; my $locant = first { $self->graph->has_edge( $self->{vertices}[1], $vertices[$_] ) } 0..$#vertices; $suffix->append_locants( $locant + 1 ); } $suffix .= 'carbox'; return $suffix; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Circular.pm000066400000000000000000000235321452012116100236300ustar00rootroot00000000000000package ChemOnomatopist::Chain::Circular; use strict; use warnings; # ABSTRACT: Chain whose first and last members are connected # VERSION use ChemOnomatopist; use ChemOnomatopist::Chain; # FIXME: Not sure why it is needed use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Chain::Monocycle; use ChemOnomatopist::Util::SMILES qw( cycle_SMILES ); use Chemistry::OpenSMILES qw( is_single_bond ); use List::Util qw( all any uniq ); use Scalar::Util qw( blessed ); use Set::Object qw( set ); use parent ChemOnomatopist::Chain::; sub new { my( $class, $graph, @vertices ) = @_; return bless { graph => $graph, vertices => \@vertices }, $class; } # Selecting the candidate with the lowest alphabetical order sub backbone_SMILES() { my( $self ) = @_; my @vertices = $self->vertices; my @candidates; for (0..$#vertices) { push @candidates, cycle_SMILES( $self->graph, @vertices ); push @vertices, shift @vertices; } @vertices = reverse @vertices; for (0..$#vertices) { push @candidates, cycle_SMILES( $self->graph, @vertices ); push @vertices, shift @vertices; } my( $SMILES ) = sort @candidates; return $SMILES; } # FIXME: What to do with furan and others? sub is_aromatic() { my( $self ) = @_; return 1 if all { $_ eq ':' } $self->bonds; return ''; } sub is_benzene() { my( $self ) = @_; return $self->is_aromatic && $self->is_homogeneous && $self->length == 6; } sub is_heterocycle() { my( $self ) = @_; return $self->number_of_heteroatoms > 0; } sub is_homogeneous() { my( $self ) = @_; if( any { blessed $_ } $self->vertices ) { die "cannot process cycles with groups as their members\n"; } my @elements = map { $_->{symbol} } $self->vertices; my @bonds = $self->bonds; my( $element ) = @elements; my( $bond ) = @bonds; return '' if any { $_ ne $element } @elements; return 1 if $self->is_aromatic; return '' if any { $_ ne $bond } @bonds; return 1; } sub name() { my( $self ) = @_; my $graph = $self->graph; my $SMILES = $self->backbone_SMILES; my %names = %ChemOnomatopist::Chain::Monocycle::names; # Check the preserved names return ChemOnomatopist::Name->new( $names{$SMILES} ) if exists $names{$SMILES}; # Check for aromatic notation if( $SMILES =~ /:/ ) { $SMILES =~ s/([a-z]):/'' . uc( $1 )/ge; $SMILES =~ s/\[([a-z]{1,2})\]:/'[' . uc( $1 ) . ']'/ge; for my $SMILES_for_name (keys %names) { next unless $SMILES_for_name =~ /=/; my $name = $names{$SMILES_for_name}; $SMILES_for_name =~ s/=//g; return ChemOnomatopist::Name->new( $name ) if $SMILES eq $SMILES_for_name; } } # Check for annulenes if( $self->is_hydrocarbon && $self->is_aromatic && $self->length =~ /^(4|6|8|10|12|14|16)$/ ) { return ChemOnomatopist::Name->new( 'cyclo' . ChemOnomatopist::IUPAC_numerical_multiplier( $self->length, 1 ) . ChemOnomatopist::IUPAC_numerical_multiplier( $self->length / 2, 1 ) . 'ene' ); } # Check for cycloalkanes if( $self->is_hydrocarbon ) { my $name = ChemOnomatopist::Name->new( 'cyclo' ); $name .= ChemOnomatopist::unbranched_chain_name( $self ); return $name; } if( $self->length >= 3 && $self->length <= 10 && any { $_->{symbol} =~ /^[cC]$/ } $self->vertices ) { # Hantzsch-Widman names (BBv2 P-22.2.2.1) # Collect the types of heteroatoms and their attachment positions my %heteroatoms; my @vertices = $self->vertices; for my $i (0..$#vertices) { next if ChemOnomatopist::is_element( $vertices[$i], 'C' ); my $symbol = ucfirst $vertices[$i]->{symbol}; $heteroatoms{$symbol} = [] unless $heteroatoms{$symbol}; push @{$heteroatoms{$symbol}}, $i; } my $least_senior_element; my @heteroatom_locants; for my $element (sort { $elements{$a}->{seniority} <=> $elements{$b}->{seniority} } keys %heteroatoms) { push @heteroatom_locants, @{$heteroatoms{$element}}; $least_senior_element = $element; } my $name = ChemOnomatopist::Name->new; unless( @heteroatom_locants == 1 || (scalar keys %heteroatoms == 1 && @heteroatom_locants == $self->length - 1) ) { # Locants are omitted according to BBv2 P-22.2.2.1.7 $name->append_locants( map { $_ + 1 } @heteroatom_locants ); } for my $element (sort { $elements{$a}->{seniority} <=> $elements{$b}->{seniority} } keys %heteroatoms) { if( @{$heteroatoms{$element}} > 1 ) { $name->append_multiplier( ChemOnomatopist::IUPAC_numerical_multiplier( scalar @{$heteroatoms{$element}} ) ); } $name->append_element( exists $elements{$element}->{HantzschWidman} ? $elements{$element}->{HantzschWidman} : $elements{$element}->{prefix} ); } $name->[-1] =~ s/a$//; if( $self->length <= 5 ) { my @stems = ( 'ir', 'et', 'ol' ); $name .= ChemOnomatopist::Name::Part::Stem->new( $stems[$self->length - 3] ); if( $self->is_saturated ) { $name .= $heteroatoms{N} ? 'idine' : 'ane'; } elsif( $self->length == 3 ) { $name .= $heteroatoms{N} ? 'ene' : 'ine'; } else { $name .= 'e'; } return $name; } elsif( $self->length == 6 ) { if( ($elements{$least_senior_element}->{seniority} >= 5 && $elements{$least_senior_element}->{seniority} <= 8) || $least_senior_element eq 'Bi' ) { $name .= $self->is_saturated ? 'ane' : 'ine'; } elsif( ($elements{$least_senior_element}->{seniority} >= 16 && $elements{$least_senior_element}->{seniority} <= 19) || $least_senior_element eq 'N' ) { $name .= $self->is_saturated ? 'inane' : 'ine'; } else { $name .= $self->is_saturated ? 'inane' : 'inine'; } return $name; } elsif( $self->length >= 7 ) { my @stems = ( 'ep', 'oc', 'on', 'ec' ); $name .= ChemOnomatopist::Name::Part::Stem->new( $stems[$self->length - 7] ); $name .= $self->is_saturated ? 'ane' : 'ine'; return $name; } } my $name = ChemOnomatopist::Name->new( 'cyclo' ); $name .= ChemOnomatopist::unbranched_chain_name( $self ); return $name; } sub needs_multiple_bond_locants() { my( $self ) = @_; return 1 if $self->number_of_multiple_bonds > 1; return 1 if $self->number_of_branches || $self->parent; return scalar( uniq map { $_->{symbol} } $self->vertices ) > 1; } sub needs_substituent_locants() { my( $self ) = @_; # BBv2 P-14.3.4.2 (c): monosubstituted homogeneous cycles do not need locants return '' if $self->is_homogeneous && $self->number_of_branches == 1; return '' if $self->is_homogeneous && $self->number_of_branches >= $self->max_valence - 1; return 1; } sub needs_suffix_locant() { my( $self ) = @_; return $self->needs_substituent_locants; } # sub branch_positions() # TODO: Maybe need to add 1 to all returned positions? sub bonds() { my( $self ) = @_; my @bonds = $self->SUPER::bonds; my $graph = $self->graph; my @vertices = $self->vertices; if( $graph->has_edge_attribute( $vertices[0], $vertices[-1], 'bond' ) ) { push @bonds, $graph->get_edge_attribute( $vertices[0], $vertices[-1], 'bond' ); } else { push @bonds, '-'; } return @bonds; } sub indicated_hydrogens() { my( $self ) = @_; my @positions; my @vertices = $self->vertices; my $graph = $self->graph; for my $i (0..$#vertices) { # Rough interpretation of BBv2 P-14.7.1 and P-22.2.2.1.4 next unless $vertices[$i]->{symbol} =~ /^[CN]$/i; next unless $graph->degree( $vertices[$i] ) == 2; next unless all { is_single_bond( $graph, $vertices[$i], $_ ) } @vertices; push @positions, $i; } return @positions; } # Implemented according to BBv2 P-25.3.3.1.1 sub locants(@) { my $self = shift; my @vertices = $self->vertices; my $graph = $self->graph->subgraph( @vertices ); return map { $_ + 1 } @_ if $graph->vertices == $graph->edges; my %locant_map; my $pos = 0; my $letter = 'a'; for my $i (0..$#vertices) { if( $graph->degree( $vertices[$i] ) == 2 || !ChemOnomatopist::is_element( $vertices[$i], 'C' ) ) { $pos++; $locant_map{$i} = $pos; $letter = 'a'; } else { $locant_map{$i} = $pos . $letter; $letter++; } } return map { $locant_map{$_} } @_; } # In aromatic systems Kekule bonds have to be calculated, otherwise seniority rules may fail. sub number_of_double_bonds() { my( $self ) = @_; return $self->SUPER::number_of_double_bonds unless $self->is_aromatic; return int( $self->length / 2 ); } sub number_of_multiple_bonds() { my( $self ) = @_; return $self->SUPER::number_of_multiple_bonds unless $self->is_aromatic; return $self->number_of_double_bonds; } sub _cmp_instances { my( $A, $B ) = @_; # BBv2 P-44.2.1 (a) if( $A->is_heterocycle <=> $B->is_heterocycle ) { return $B->is_heterocycle <=> $A->is_heterocycle; } # BBv2 P-44.2.1 (b) if( set( $A->heteroatoms )->has( 'N' ) <=> set( $B->heteroatoms )->has( 'N' ) ) { return set( $B->heteroatoms )->has( 'N' ) <=> set( $A->heteroatoms )->has( 'N' ); } # BBv2 P-44.2.1 (e) return $B->length <=> $A->length if $A->length <=> $B->length; # BBv2 P-44.2.1 (f) return scalar( $B->heteroatoms ) <=> scalar( $A->heteroatoms ); } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Ether.pm000066400000000000000000000054641452012116100231370ustar00rootroot00000000000000package ChemOnomatopist::Chain::Ether; use strict; use warnings; # ABSTRACT: Ether chain # VERSION use parent ChemOnomatopist::Chain::; use ChemOnomatopist; use ChemOnomatopist::Group::Ether; use ChemOnomatopist::Name; use Scalar::Util qw( blessed ); sub new { my( $class, $graph, $parent, @vertices ) = @_; my $self = { vertices => \@vertices, graph => $graph }; $self->{parent} = $parent if $parent; return bless $self, $class; } sub needs_heteroatom_locants() { return '' } sub needs_heteroatom_names() { return '' } sub prefix() { my( $self ) = @_; my @vertices = $self->vertices; return 'oxy' if @vertices == 1; my( $cut_position ) = grep { blessed $vertices[$_] && $vertices[$_]->isa( ChemOnomatopist::Group::Ether:: ) } 0..$#vertices; if( $cut_position ) { my @chains = ( ChemOnomatopist::Chain->new( $self->graph, $self->parent, reverse @vertices[0..$cut_position-1] ), ChemOnomatopist::Chain->new( $self->graph, $self->parent, @vertices[$cut_position+1..$#vertices] ) ); my @prefixes = map { $_->prefix } @chains; if( $prefixes[0] =~ /ane$/ ) { pop @{$prefixes[0]}; pop @{$prefixes[0]}; } my $name = ChemOnomatopist::Name->new; $name->append_locants( $cut_position ); $name->append( $prefixes[0] ); $name->append( 'oxy' ); $name->append( $prefixes[1] ); return $name; } else { my $chain = ChemOnomatopist::Chain->new( $self->graph, @vertices ); my $name = $chain->prefix; $name =~ s/ane$//; return $name . 'oxy'; } } sub suffix() { my( $self ) = @_; my @vertices = $self->vertices; my( $cut_position ) = grep { blessed $vertices[$_] && $vertices[$_]->isa( ChemOnomatopist::Group::Ether:: ) } 0..$#vertices; my @chains = ( ChemOnomatopist::Chain->new( $self->graph, $self->parent, reverse @vertices[0..$cut_position-1] ), ChemOnomatopist::Chain->new( $self->graph, $self->parent, @vertices[$cut_position+1..$#vertices] ) ); @chains = reverse @chains if $chains[0]->length > $chains[1]->length; my $name = $chains[0]->prefix; pop @$name; # Dropping 'ane', in a rather dirty way pop @$name; $name .= 'oxy'; $name .= $chains[1]->suffix; return $name; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/FromHalves.pm000066400000000000000000000071251452012116100241320ustar00rootroot00000000000000package ChemOnomatopist::Chain::FromHalves; use strict; use warnings; # ABSTRACT: Chain formed by two halves # VERSION use Chemistry::OpenSMILES qw( is_single_bond ); use List::Util qw( all sum sum0 ); use parent ChemOnomatopist::Chain::; sub new { my( $class, @halves ) = @_; # FIXME: This check might be too weak die "halves must belong to the same graph\n" if $halves[0]->graph ne $halves[1]->graph; return bless { halves => \@halves }, $class; } sub graph() { my( $self ) = @_; return $self->{halves}[0]->graph; } sub halves() { my( $self ) = @_; return @{$self->{halves}}; } sub branch_positions() { my( $self ) = @_; my @half0_positions = $self->{halves}[0]->branch_positions; my @half1_positions = $self->{halves}[1]->branch_positions; # If path parts start at the same atom, its attachments get duplicated @half1_positions = grep { $_ } @half1_positions unless $self->{halves}[0]{other_center}; return ( map { $self->{halves}[0]->length - $_ - 1 } reverse @half0_positions ), ( map { $self->{halves}[1]->length + $_ - !defined $self->{halves}[0]{other_center} } @half1_positions ); } # FIXME: Somewhy this fails '2,8-dioxa-4,5-dithia-11-selenadodecane' test in t/16_heteroatoms.t #~ sub heteroatom_positions() #~ { #~ my( $self ) = @_; #~ my @half0_positions = $self->{halves}[0]->heteroatom_positions; #~ my @half1_positions = $self->{halves}[1]->heteroatom_positions; #~ # If path parts start at the same atom, its attachments get duplicated #~ @half1_positions = grep { $_ } @half1_positions unless $self->{halves}[0]{other_center}; #~ return ( map { $self->{halves}[0]->length - $_ - 1 } reverse @half0_positions ), #~ ( map { $self->{halves}[1]->length + $_ - !defined $self->{halves}[0]{other_center} } @half1_positions ); #~ } sub most_senior_group_positions() { my( $self ) = @_; my @half0_positions = $self->{halves}[0]->most_senior_group_positions; my @half1_positions = $self->{halves}[1]->most_senior_group_positions; # If path parts start at the same atom, its attachments get duplicated @half1_positions = grep { $_ } @half1_positions unless $self->{halves}[0]{other_center}; return ( map { $self->{halves}[0]->length - $_ - 1 } reverse @half0_positions ), ( map { $self->{halves}[1]->length + $_ - !defined $self->{halves}[0]{other_center} } @half1_positions ); } sub bonds() { my( $self ) = @_; my @bonds = reverse $self->{halves}[0]->bonds; if( $self->{halves}[0]->{other_center} ) { my @centers = map { $_->{other_center} } $self->halves; if( $self->graph->has_edge_attribute( @centers, 'bond' ) ) { push @bonds, $self->graph->get_edge_attribute( @centers, 'bond' ); } else { push @bonds, '-'; } } push @bonds, $self->{halves}[1]->bonds; return @bonds; } sub locant_names() { my( $self ) = @_; return reverse( $self->{halves}[0]->locant_names ), $self->{halves}[1]->locant_names; } # Not sure why this has to be overriden sub number_of_branches() { my( $self ) = @_; return int sum0 map { $_->number_of_branches } $self->halves; } sub vertices() { my( $self ) = @_; my @A = $self->{halves}[0]->vertices; my @B = $self->{halves}[1]->vertices; # If there is only one center atom, it appears in both chains shift @B unless $self->{halves}[0]->{other_center}; my @vertices = ( reverse( @A ), @B ); # Otherwise scalar is returned sometimes return @vertices; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Imino.pm000066400000000000000000000023521452012116100231340ustar00rootroot00000000000000package ChemOnomatopist::Chain::Imino; use strict; use warnings; # ABSTRACT: Imino chain # VERSION use ChemOnomatopist; use ChemOnomatopist::Name; use List::Util qw( first ); use Scalar::Util qw( blessed ); sub AUTOLOAD { our $AUTOLOAD; my $call = $AUTOLOAD; $call =~ s/.*:://; return if $call eq 'DESTROY'; my $self = shift; return $self->{chain}->can( $call )->( $self->{chain}, @_ ); } sub new { my( $class, $graph, $chain, $amine ) = @_; return bless { graph => $graph, chain => $chain, amine => $amine }; } sub vertices() { my $self = shift; my @vertices = ( $self->{amine}, $self->{chain}->vertices ); return @vertices; } sub locants(@) { my $self = shift; return map { $_ ? $_ : 'N' } @_; } sub needs_substituent_locants() { return 1 } sub suffix() { my( $self ) = @_; my $suffix = $self->{chain}->suffix; return $suffix unless $self->{chain}->needs_suffix_locant; return $suffix if $self->{chain}->length == 2; # Ad-hoc fix for ethanamines my $neighbour = first { $self->graph->has_edge( $self->{amine}, $_ ) } $self->{chain}->vertices; return $suffix->append_locants( $self->{chain}->locants( $self->vertex_ids( $neighbour ) ) ); } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Monocycle.pm000066400000000000000000000165551452012116100240230ustar00rootroot00000000000000package ChemOnomatopist::Chain::Monocycle; use strict; use warnings; # ABSTRACT: Monocyclic group # VERSION use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Group::Sulfinyl; use ChemOnomatopist::Group::Sulfonyl; use ChemOnomatopist::Name; use List::Util qw( all ); use Scalar::Util qw( blessed ); # From BBv2 P-22.2.1 our %names = ( CCNCO => '1,3-oxazolidine', CCCNO => '1,2-oxazolidine', CCCCN => 'pyrrolidine', CCCNN => 'pyrazolidine', CCNCN => 'imidazolidine', CCCCCN => 'piperidine', CCNCCN => 'piperazine', CCNCCO => 'morpholine', # 5-membered aromatic 'C=CC=CO' => 'furan', 'C=COc:c' => 'furan', # For fused rings 'C=CN=CN' => '1H-imidazole', # FIXME: Adjust for isomerism 'C=CN=CO' => '1,3-oxazole', 'C=CC=NO' => '1,2-oxazole', 'C=CC=NN' => 'pyrazole', # FIXME: Adjust for isomerism 'C=CC=CN' => '1H-pyrrole', # FIXME: Adjust for isomerism 'C=CNc:c' => '1H-pyrrole', # For fused rings 'C=CC=C[Se]' => 'selenophene', 'C=CC=C[Te]' => 'tellurophene', 'C=CC=CS' => 'thiophene', 'C=CSc:c' => 'thiophene', # For fused rings # 6-membered aromatic 'c:c:c:c:c:c:' => 'benzene', 'C=CC=CCO' => '2H-pyran', # FIXME: Adjust for isomerism 'c:c:n:c:c:n:' => 'pyrazine', 'c:c:c:c:n:n:' => 'pyridazine', 'c:c:c:c:c:n:' => 'pyridine', 'c:c:c:n:c:n:' => 'pyrimidine', ); sub new { my( $class, $graph, @vertices ) = @_; my $self = bless { graph => $graph, vertices => \@vertices }, $class; return $self if $self->is_homogeneous; ( $self ) = $self->autosymmetric_equivalents; return $self; } # FIXME: For now we generate all possible traversals of the same cycle. # This is not optimal, some caching could be introduced. sub candidates() { my( $self ) = @_; my $graph = $self->graph; my @vertices = $self->vertices; my @chains; for (0..$#vertices) { push @chains, ChemOnomatopist::Chain::Monocycle->new( $graph, @vertices ); push @vertices, shift @vertices; } @vertices = reverse @vertices; for (0..$#vertices) { push @chains, ChemOnomatopist::Chain::Monocycle->new( $graph, @vertices ); push @vertices, shift @vertices; } for (@chains) { $_->{candidate_for} = $self; } return @chains; } sub autosymmetric_equivalents() { my( $self ) = @_; my @vertices = $self->vertices; my $cycle = $self->graph->subgraph( \@vertices ); # TODO: Add attributes my @chains; for (0..$#vertices) { push @chains, ChemOnomatopist::Chain::Circular->new( $cycle, @vertices ); push @vertices, shift @vertices; } @vertices = reverse @vertices; for (0..$#vertices) { push @chains, ChemOnomatopist::Chain::Circular->new( $cycle, @vertices ); push @vertices, shift @vertices; } # CHECKME: Additional rules from ChemOnomatopist::filter_chains() might still be needed @chains = sort { ChemOnomatopist::Chain::Monocycle::_cmp( $a, $b ) } @chains; @chains = grep { !ChemOnomatopist::Chain::Monocycle::_cmp( $_, $chains[0] ) } @chains; @chains = map { bless { graph => $self->graph, vertices => [ $_->vertices ] } } @chains; return ChemOnomatopist::rule_lowest_numbered_locants( @chains ); } sub parent(;$) { my( $self, $parent ) = @_; my $old_parent = $self->SUPER::parent( $parent ); return $old_parent unless $parent; return $old_parent if $old_parent && $parent == $old_parent; # Addition of parent to homogeneous cycles settles the otherwise ambiguous order # TODO: Other autosymmetric monocycles can possibly as well be settled if( $self->is_homogeneous ) { my @vertices = $self->vertices; my( $position ) = grep { $self->graph->has_edge( $vertices[$_], $parent ) } 0..$#vertices; if( defined $position ) { my @chains = ( ChemOnomatopist::Chain::Circular->new( $self->graph, @vertices[$position..$#vertices], @vertices[0..$position-1] ) ); @vertices = reverse @vertices; $position = $#vertices - $position; push @chains, ChemOnomatopist::Chain::Circular->new( $self->graph, @vertices[$position..$#vertices], @vertices[0..$position-1] ); for (@chains) { $_->{parent} = $parent; } my( $chain ) = ChemOnomatopist::filter_chains( @chains ); $self->{vertices} = [ $chain->vertices ]; } } return $old_parent; } sub needs_heteroatom_locants() { my( $self ) = @_; return $self->length < 3 || $self->length > 10 || all { $_->{symbol} !~ /^[cC]$/ } $self->vertices; } sub needs_heteroatom_names() { my( $self ) = @_; return $self->needs_heteroatom_locants; } sub prefix() { my( $self ) = @_; my $parent = $self->parent; my $name = $self->suffix; if( $name eq 'benzene' ) { if( $parent && blessed $parent && ( $parent->isa( ChemOnomatopist::Group::Sulfinyl:: ) || $parent->isa( ChemOnomatopist::Group::Sulfonyl:: ) ) ) { # Rule derived from examples in BBv2 P-63.6 return $name; } return 'phenyl'; } $name = ChemOnomatopist::Name->new( $name ) unless blessed $name; $name->{name}[-1] =~ s/e$//; pop @$name if $name->{name}[-1] eq ''; pop @$name if $name->ends_with_alkane_an_suffix; if( $parent && !$self->is_homogeneous ) { # FIXME: Order of vertices seems to be established regardless of the attachments. # This causes ambiguity, for example, in 1-(1,4-diazepan-1-ylsulfonyl)-8-methylisoquinoline my @vertices = $self->vertices; my( $position ) = grep { $self->graph->has_edge( $parent, $vertices[$_] ) } 0..$#vertices; die "unknown locant in multicyclic compound\n" unless defined $position; $name->append_substituent_locant( $self->locants( $position ) ); } $name .= 'yl'; return $name; } # FIXME: This is a bit strange: class and object method with the same name sub suffix() { my( $self ) = @_; return '' unless ref $self; my $name = $self->name; return blessed $name ? $name : ChemOnomatopist::Name->new( $name ); } # FIXME: Pay attention to bond orders sub _cmp { my( $A, $B ) = @_; my @A_heteroatoms = $A->heteroatoms; my @A_positions = $A->heteroatom_positions; @A_positions = map { $A_positions[$_] } sort { $elements{$A_heteroatoms[$a]}->{seniority} <=> $elements{$A_heteroatoms[$b]}->{seniority} } 0..$#A_positions; my @B_heteroatoms = $B->heteroatoms; my @B_positions = $B->heteroatom_positions; @B_positions = map { $B_positions[$_] } sort { $elements{$B_heteroatoms[$a]}->{seniority} <=> $elements{$B_heteroatoms[$b]}->{seniority} } 0..$#B_positions; return ChemOnomatopist::cmp_arrays( \@A_positions, \@B_positions ) if ChemOnomatopist::cmp_arrays( \@A_positions, \@B_positions ); return ChemOnomatopist::cmp_arrays( [ $A->multiple_bond_positions ], [ $B->multiple_bond_positions ] ); } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Monocycle/000077500000000000000000000000001452012116100234515ustar00rootroot00000000000000ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Monocycle/Fused.pm000066400000000000000000000025451452012116100250630ustar00rootroot00000000000000package ChemOnomatopist::Chain::Monocycle::Fused; use strict; use warnings; # ABSTRACT: Monocyclic group which is fused to some other cycle # VERSION use parent ChemOnomatopist::Chain::Monocycle::; use ChemOnomatopist; use ChemOnomatopist::Util::SMILES qw( cycle_SMILES ); use List::Util qw( all ); sub new { my( $class, $graph, $system, @vertices ) = @_; return bless { graph => $graph, system => $system, vertices => \@vertices }, $class; } sub system() { my( $self ) = @_; return $self->{system}; } sub backbone_SMILES() { my( $self ) = @_; return cycle_SMILES( $self->graph, $self->vertices ); } # Returns a copy of the monocycle flipped around the bridge sub flipped() { my( $self ) = @_; my @vertices = $self->vertices; my @bridge = splice @vertices, -2; my @flipped = ( reverse( @vertices ), reverse( @bridge ) ); return ChemOnomatopist::Chain::Monocycle::Fused->new( $self->graph, $self->system, @flipped ); } # Find the best orientation between self and flipped self. # Return 1 if flipped. sub orient() { my( $self ) = @_; my( $chain ) = ChemOnomatopist::filter_chains( $self, $self->flipped ); return '' if $chain == $self; $self->{vertices} = $chain->{vertices}; return 1; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Monospiro.pm000066400000000000000000000077431452012116100240570ustar00rootroot00000000000000package ChemOnomatopist::Chain::Monospiro; use strict; use warnings; # ABSTRACT: Monospiro compound # VERSION use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist; use Graph::Traversal::DFS; sub new { my( $class, $graph, @vertices ) = @_; my $subgraph = $graph->subgraph( \@vertices ); my( $spiro_atom ) = grep { $subgraph->degree( $_ ) == 4 } @vertices; $subgraph->delete_vertex( $spiro_atom ); # Graph is broken into components. # Each component is represented as an array of vertices in the order of traverse. my @components; for my $component (sort { @$a <=> @$b } $subgraph->connected_components) { my( $start ) = sort { $subgraph->degree( $a ) <=> $subgraph->degree( $b ) } @$component; push @components, [ Graph::Traversal::DFS->new( $subgraph, start => $start )->dfs ]; } return bless { graph => $graph, spiro_atom => $spiro_atom, components => \@components }, $class; } sub candidates() { my( $self ) = @_; # "Numbering starts in the smaller ring, if one is smaller, at a ring atom next to the spiro atom and proceeds first around that ring, then through the spiro atom and around the second ring." my $graph = $self->graph; my $spiro_atom = $self->{spiro_atom}; my( $A, $B ) = $self->components; my @candidates; push @candidates, $self, bless( { graph => $graph, spiro_atom => $spiro_atom, components => [ $A, [ reverse @$B ] ], candidate_for => $self } ), bless( { graph => $graph, spiro_atom => $spiro_atom, components => [ [ reverse @$A ], $B ], candidate_for => $self } ), bless( { graph => $graph, spiro_atom => $spiro_atom, components => [ [ reverse @$A ], [ reverse @$B ] ], candidate_for => $self } ); if( @$A == @$B ) { push @candidates, bless( { graph => $graph, spiro_atom => $spiro_atom, components => [ $B, $A ], candidate_for => $self } ), bless( { graph => $graph, spiro_atom => $spiro_atom, components => [ $B, [ reverse @$A ] ], candidate_for => $self } ), bless( { graph => $graph, spiro_atom => $spiro_atom, components => [ [ reverse @$B ], $A ], candidate_for => $self } ), bless( { graph => $graph, spiro_atom => $spiro_atom, components => [ [ reverse @$B ], [ reverse @$A ] ], candidate_for => $self } ); } return @candidates; } sub components() { my( $self ) = @_; return @{$self->{components}}; } sub vertices() { my( $self ) = @_; my( $A, $B ) = $self->components; my @vertices = ( @$A, $self->{spiro_atom}, @$B ); return @vertices; } sub has_form($$) { my( $class, $graph ) = @_; my %degrees = map { $graph->degree( $_ ) => 1 } $graph->vertices; return '' unless join( ',', sort keys %degrees ) eq '2,4'; my @d4 = grep { $graph->degree( $_ ) == 4 } $graph->vertices; return '' unless @d4 == 1; return 1; } sub locants() { shift; return map { $_ + 1 } @_ } sub prefix() { my( $self ) = @_; my $name = ChemOnomatopist::Name->new( 'spiro' ); $name .= ChemOnomatopist::Name::Part::Fusion->new( '[' . join( '.', map { scalar @$_ } $self->components ) . ']' ); $name .= ChemOnomatopist::alkane_chain_name( $self->length ) . 'an'; if( $self->parent ) { my @vertices = $self->vertices; my( $position ) = grep { $self->graph->has_edge( $self->parent, $vertices[$_] ) } 0..$#vertices; die "unknown locant in multicyclic compound\n" unless defined $position; $name->append_substituent_locant( $self->locants( $position ) ); } $name .= 'yl'; return $name; } # FIXME: This is a bit strange: class and object method with the same name sub suffix(@) { my( $self ) = @_; return '' unless ref $self; my $name = ChemOnomatopist::Name->new( 'spiro' ); $name .= ChemOnomatopist::Name::Part::Fusion->new( '[' . join( '.', map { scalar @$_ } $self->components ) . ']' ); $name .= ChemOnomatopist::unbranched_chain_name( $self ); return $name; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Phenanthrene.pm000066400000000000000000000065641452012116100245110ustar00rootroot00000000000000package ChemOnomatopist::Chain::Phenanthrene; use strict; use warnings; # ABSTRACT: Phenanthrene or its derivative # VERSION use ChemOnomatopist::Chain::Polyaphene; use ChemOnomatopist::Util::Graph qw( merge_graphs ); use Graph::Undirected; use List::Util qw( first all any ); use parent ChemOnomatopist::Chain::Polyaphene::; sub new { my( $class, $graph, @cycles ) = @_; my $subgraph = $graph->subgraph( map { $_->vertices } @cycles ); # Deleting all edges having degree 3 vertices at both ends $subgraph->delete_edges( map { @$_ } grep { $subgraph->degree( $_->[0] ) == 3 && $subgraph->degree( $_->[1] ) == 3 } $subgraph->edges ); # Find an order my( $start ) = grep { $subgraph->degree( $_ ) == 1 } $subgraph->vertices; my @vertices = Graph::Traversal::DFS->new( $subgraph, start => $start )->dfs; # Adjust the order if( any { ChemOnomatopist::element( $_ ) eq 'N' } @vertices ) { # Find the order so as N is closest to the begining of the chain # CHECKME: This might not be correct due to offset my $first = first { ChemOnomatopist::element( $vertices[$_] ) eq 'N' } 0..$#vertices; my $last = first { ChemOnomatopist::element( $vertices[-1-$_] ) eq 'N' } 0..$#vertices; @vertices = reverse @vertices if $last < $first; push @vertices, shift @vertices; # Phenanthridine has a strict order if( (grep { ChemOnomatopist::element( $_ ) eq 'N' } @vertices) == 1 && ChemOnomatopist::element( $vertices[5] ) ne 'N' ) { die "cannot handle complicated cyclic compounds\n"; } } else { for (1..5) { push @vertices, shift @vertices; } @vertices = reverse @vertices; } return bless { graph => $graph, vertices => \@vertices }, $class; } sub candidates() { my( $self ) = @_; my @candidates = ( $self ); if( $self->is_hydrocarbon ) { my @vertices = $self->vertices; push @candidates, bless { graph => $self->graph, vertices => [ reverse map { $vertices[$_] } ( 10..13, 0..9 ) ], candidate_for => $self }; } return @candidates; } sub ideal_graph($) { my( $class ) = @_; my @graphs; for (0..1) { my $graph = Graph::Undirected->new( refvertexed => 1 ); $graph->add_cycle( map { { symbol => 'C', number => $_-1 } } 1..6 ); push @graphs, $graph; } my $graph = merge_graphs( @graphs ); # Pick an edge from each graph my( $A ) = $graphs[0]->edges; my( $B ) = $graphs[1]->edges; # Join a pair of atoms with an edge $graph->add_edge( $A->[0], $B->[0] ); # Add a longer arc between other two atoms $graph->add_path( $A->[1], { symbol => 'C' }, { symbol => 'C' }, $B->[1] ); return $graph; } sub needs_heteroatom_locants() { my( $self ) = @_; return $self->number_of_heteroatoms == 2; } sub needs_heteroatom_names() { return '' } sub prefix() { my( $self ) = @_; if( all { $_ eq 'N' } $self->heteroatoms ) { return 'phenanthridine' if $self->number_of_heteroatoms == 1; return 'phenanthroline' if $self->number_of_heteroatoms == 2; } return 'phenanthrene'; } sub suffix() { return $_[0]->prefix } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Polyacene.pm000066400000000000000000000066041452012116100240040ustar00rootroot00000000000000package ChemOnomatopist::Chain::Polyacene; use strict; use warnings; # ABSTRACT: Polyacenes, including anthracene # VERSION use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist::Util::Graph qw( graph_without_edge_attributes subgraph ); use Graph::Nauty qw( are_isomorphic ); use Graph::Undirected; use Set::Object qw( set ); sub new { my( $class, $graph, @cycles ) = @_; my @vertices = map { $_->vertices } @cycles; my $subgraph = subgraph( $graph, @vertices ); # Identify the triple-connected vertices my $d3 = set( grep { $subgraph->degree( $_ ) == 3 } $subgraph->vertices ); # Remove them and intermediate atoms $subgraph->delete_vertices( @$d3, grep { set( $subgraph->neighbours( $_ ) ) <= $d3 } $subgraph->vertices ); # Find the first vertex my( $start ) = grep { $subgraph->degree( $_ ) == 1 } $subgraph->vertices; $subgraph = subgraph( $graph, @vertices ); # Restore the subgraph # Delete the edge which closes the all-encompassing cycle $subgraph->delete_edge( $start, grep { $subgraph->degree( $_ ) == 3 } $subgraph->neighbours( $start ) ); $subgraph->delete_edges( map { @$_ } grep { $subgraph->degree( $_->[0] ) == 3 && $subgraph->degree( $_->[1] ) == 3 } $subgraph->edges ); @vertices = reverse( Graph::Traversal::DFS->new( $subgraph, start => $start )->dfs ); return bless { graph => $graph, vertices => \@vertices }, $class; } sub candidates() { my( $self ) = @_; my @candidates = ( $self ); my $subgraph = subgraph( $self->graph, $self->vertices ); my @chords = grep { $subgraph->degree( $_->[0] ) == 3 && $subgraph->degree( $_->[1] ) == 3 } $subgraph->edges; $subgraph->delete_edges( $self->{vertices}[3], $self->{vertices}[4], map { @$_ } @chords ); push @candidates, bless { graph => $self->graph, vertices => [ reverse Graph::Traversal::DFS->new( $subgraph, start => $self->{vertices}[3] )->dfs ], candidate_for => $self }; return @candidates; } sub needs_substituent_locants() { return 1 } sub has_form($$) { my( $class, $graph ) = @_; my %degrees = map { $graph->degree( $_ ) => 1 } $graph->vertices; return '' unless join( ',', sort keys %degrees ) eq '2,3'; my $N = scalar $graph->vertices; return '' if $N < 14; return '' if ($N - 14) % 4; return are_isomorphic( graph_without_edge_attributes( $graph ), $class->ideal_graph( $N ), sub { return 'C' } ); } sub ideal_graph($$) { my( $class, $N ) = @_; die "cannot construct polyacene with $N vertices\n" if $N < 10 || ($N-2) % 4; my $graph = Graph::Undirected->new( refvertexed => 1 ); my @vertices = map { { symbol => 'C', number => $_-1 } } 1..$N; $graph->add_cycle( @vertices ); for (0..($N-6) / 4 -1) { $graph->add_edge( map { $vertices[$_] } ( 4 + 2*$_, $N - 1 - 2*$_ ) ); } return $graph; } sub suffix { my( $self ) = @_; return 'anthracene' if $self->length == 14; return ChemOnomatopist::IUPAC_numerical_multiplier( ($self->length - 2) / 4 ) . 'acene'; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Polyaphene.pm000066400000000000000000000141471452012116100241720ustar00rootroot00000000000000package ChemOnomatopist::Chain::Polyaphene; use strict; use warnings; # ABSTRACT: Polyaphenes, including phenanthrene # VERSION use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist::Util::Graph qw( graph_without_edge_attributes merge_graphs subgraph ); use Graph::Nauty qw( are_isomorphic ); use Graph::Undirected; use Set::Object qw( set ); sub new { my( $class, $graph, @cycles ) = @_; my $subgraph = subgraph( $graph, map { $_->vertices } @cycles ); # Constructing the connectivity graph for cycles my $connectivity_graph = Graph::Undirected->new( refvertexed => 1 ); for my $vertex ($subgraph->vertices) { next unless $subgraph->degree( $vertex ) == 3; $connectivity_graph->add_edge( grep { set( $_->vertices )->has( $vertex ) } @cycles ); } # Detecting the common ring which has three edges having degree 3 at their ends my $common_ring; for my $cycle (@cycles) { next unless scalar( grep { $subgraph->degree( $_->[0] ) == 3 && $subgraph->degree( $_->[1] ) == 3 } subgraph( $graph, $cycle->vertices )->edges ) == 3; $common_ring = $cycle; } # Finding the correct order of cycles, flipping if needed my $common_ring_pos = @cycles % 2 ? (@cycles - 1) / 2 : @cycles / 2 - 1; my( $start ) = grep { $connectivity_graph->degree( $_ ) == 1 } @cycles; my @cycles_in_order = Graph::Traversal::DFS->new( $connectivity_graph, start => $start )->dfs; if( !@cycles % 2 && $cycles_in_order[$common_ring_pos] != $common_ring ) { @cycles_in_order = reverse @cycles_in_order; } # Finding the atom in the common ring which will get the lowest number $subgraph = subgraph( $graph, $common_ring->vertices ); $subgraph->delete_edges( map { @$_ } map { subgraph( $graph, $_->vertices )->edges } ( $cycles_in_order[$common_ring_pos-1], $cycles_in_order[$common_ring_pos+1] ) ); my( $short_edge ) = grep { $subgraph->degree( $_->[0] ) == 1 && $subgraph->degree( $_->[1] ) == 1 } $subgraph->edges; my( $junction ) = (set( $cycles_in_order[$common_ring_pos-1]->vertices ) * set( @$short_edge ))->members; # Finding the candidates of the starting atom $subgraph = subgraph( $graph, $cycles_in_order[0]->vertices ); $subgraph->delete_vertices( $cycles_in_order[1]->vertices ); my @candidates = grep { $subgraph->degree( $_ ) == 1 } $subgraph->vertices; # Finding the first and the last atom in the enumeration order $subgraph = subgraph( $graph, map { $_->vertices } @cycles ); my $shortest_paths = $subgraph->single_source_shortest_paths( $junction ); my $min_length; my $first; for my $vertex (@candidates) { my $length = 0; my $v = $vertex; while( $shortest_paths->has_vertex_attribute( $v, 'p' ) ) { $v = $shortest_paths->get_vertex_attribute( $v, 'p' ); $length++; } if( !defined $min_length || $min_length > $length ) { $min_length = $length; $first = $vertex; } } my( $last ) = grep { $subgraph->degree( $_ ) == 3 } $subgraph->neighbours( $first ); # Deleting chords and connection between first and last atoms $subgraph->delete_edges( map { (set( $cycles_in_order[$_ ]->vertices ) * set( $cycles_in_order[$_+1]->vertices ))->members } 0..$#cycles-1 ); $subgraph->delete_edge( $first, $last ); my @vertices = Graph::Traversal::DFS->new( $subgraph, start => $last )->dfs; return bless { graph => $graph, vertices => \@vertices }, $class; } sub candidates() { my( $self ) = @_; my @candidates = ( $self ); if( (($self->length - 6) / 4) % 2 == 0 ) { my @vertices = reverse $self->vertices; my $N = ($self->length - 6) / 4 / 2; # Number of rings in one "line" for (1..($N-1)*4+2) { push @vertices, shift @vertices; } push @candidates, bless { graph => $self->graph, vertices => \@vertices, candidate_for => $self }; } return @candidates; } sub needs_substituent_locants() { return 1 } sub has_form($$) { my( $class, $graph ) = @_; my %degrees = map { $graph->degree( $_ ) => 1 } $graph->vertices; return '' unless join( ',', sort keys %degrees ) eq '2,3'; my $N = scalar $graph->vertices; return '' if $N < 6 + 3 * 4; return '' if ($N - 6) % 4; return are_isomorphic( graph_without_edge_attributes( $graph ), $class->ideal_graph( $N ), sub { return 'C' } ); } sub ideal_graph($$) { my( $class, $N ) = @_; my @sizes; if( ($N - 6) / 4 % 2 ) { # Two unequal branches @sizes = map { ( ( ($N - 6) / 4 - 1 ) / 2 + $_ ) * 4 + 2 } (0, 1); } else { # Two equal-sized branches @sizes = ( ($N - 2)/2, ($N - 2)/2 ); } my @graphs = map { ChemOnomatopist::Chain::Polyacene->ideal_graph( $_ ) } @sizes; my $graph = merge_graphs( @graphs ); # Locate the terminal edges my @termini; for my $g (@graphs) { $g->delete_vertices( map { $g->neighbours( $_ ) } grep { $g->degree( $_ ) == 3 } $g->vertices ); $g->delete_vertices( grep { $g->degree( $_ ) == 3 } $g->vertices ); my( $terminus ) = $g->edges; push @termini, $terminus; } # Create the common ring (BBv2 P-25.1.2.2) $graph->add_edge( map { $termini[$_]->[0] } (0, 1) ); $graph->add_path( $termini[0]->[1], { symbol => 'C' }, { symbol => 'C' }, $termini[1]->[1] ); return $graph; } sub prefix() { my( $self ) = @_; return ChemOnomatopist::IUPAC_numerical_multiplier( ($self->length - 2) / 4 ) . 'aphene'; } sub suffix() { return $_[0]->prefix } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Porphyrin.pm000066400000000000000000000026001452012116100240470ustar00rootroot00000000000000package ChemOnomatopist::Chain::Porphyrin; # ABSTRACT: Porphyrin compound # VERSION use strict; use warnings; use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist::Util::Graph qw( graph_without_edge_attributes ); use Graph::Nauty qw( are_isomorphic ); use Graph::Undirected; sub new { my( $class, $graph, @vertices ) = @_; return bless { graph => $graph, vertices => \@vertices }, $class; } sub needs_heteroatom_locants() { return '' } sub needs_heteroatom_names() { return '' } sub has_form($$) { my( $class, $graph ) = @_; my @vertices = $graph->vertices; return '' unless @vertices == 24; return '' unless (grep { ChemOnomatopist::is_element( $_, 'C' ) } @vertices) == 20; return '' unless (grep { ChemOnomatopist::is_element( $_, 'N' ) } @vertices) == 4; return are_isomorphic( graph_without_edge_attributes( $graph ), $class->ideal_graph, sub { ChemOnomatopist::element( $_[0] ) } ); } sub ideal_graph($) { my( $class ) = @_; my $graph = Graph::Undirected->new( refvertexed => 1 ); my @vertices = map { { symbol => 'C' } } 1..20; $graph->add_cycle( @vertices ); for (0..3) { $graph->add_path( $vertices[$_ * 5], { symbol => 'N' }, $vertices[$_ * 5 + 3] ); } return $graph; } sub prefix() { return 'porphyrin' } sub suffix() { return 'porphyrin' } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Chain/Xanthene.pm000066400000000000000000000133061452012116100236340ustar00rootroot00000000000000package ChemOnomatopist::Chain::Xanthene; use strict; use warnings; # ABSTRACT: Xanthene or its close derivative # VERSION use parent ChemOnomatopist::Chain::Circular::; use ChemOnomatopist::Elements qw( %elements ); use ChemOnomatopist::Util::Graph qw( subgraph ); use List::Util qw( any uniq ); sub new { my( $class, $graph, @cycles ) = @_; my @benzenes = grep { $_->is_benzene } @cycles; my( $other ) = grep { !$_->is_benzene } @cycles; # Find the correct vertex order my $subgraph = subgraph( $graph, map { $_->vertices } @cycles ); my @bridges = grep { $subgraph->degree( $_->[0] ) == 3 && $subgraph->degree( $_->[1] ) == 3 } $subgraph->edges; $subgraph->delete_edges( map { @$_ } @bridges ); my @heteroatom_positions = $other->heteroatom_positions; my @heteroatoms = $other->heteroatoms; if( uniq( @heteroatoms ) == 2 ) { @heteroatom_positions = reverse @heteroatom_positions if $heteroatoms[0] eq 'N'; @heteroatom_positions = reverse @heteroatom_positions if $heteroatoms[1] eq 'O'; @heteroatom_positions = reverse @heteroatom_positions if join( ',', @heteroatoms ) eq 'As,S'; } my @other_vertices = $other->vertices; $subgraph->delete_vertex( $other_vertices[($heteroatom_positions[0] + 2) % 6] ); my( $start ) = grep { $subgraph->has_vertex( $_ ) && $subgraph->degree( $_ ) == 1 } map { $_->vertices } @benzenes; my @vertices = ( reverse( Graph::Traversal::DFS->new( $subgraph, start => $start )->dfs ), $other_vertices[($heteroatom_positions[0] + 2) % 6] ); return bless { graph => $graph, vertices => \@vertices }, $class; } sub candidates() { my( $self ) = @_; my @candidates = ( $self ); my @vertices = $self->vertices; push @candidates, bless { graph => $self->graph, vertices => [ reverse @vertices[11..13], @vertices[0..10] ], candidate_for => $self }; if( $self->number_of_heteroatoms == 2 && uniq( $self->heteroatoms ) == 1 ) { # TODO: Add two more candidates } return @candidates; } sub locants(@) { my $self = shift; my @locant_map; if( $self->number_of_heteroatoms == 1 && join( '', $self->heteroatoms ) !~ /^(As|P)$/ ) { # Acridarsine and acridophosphine are numbered systematically @locant_map = ( 1..4, '4a', 10, '10a', 5..8, '8a', 9, '9a' ); } else { @locant_map = ( 1..4, '4a', 5, '5a', 6..9, '9a', 10, '10a' ); } return map { $locant_map[$_] } @_; } sub ideal_graph() { return ChemOnomatopist::Chain::Polyacene->ideal_graph( 14 ); } sub needs_heteroatom_locants() { return '' } sub needs_heteroatom_names() { return '' } sub needs_substituent_locants() { return 1 } sub prefix() { my( $self ) = @_; my $name = $self->suffix; $name->[-1]{value} =~ s/e$//; if( $self->parent ) { my @vertices = $self->vertices; my( $position ) = grep { $self->graph->has_edge( $self->parent, $vertices[$_] ) } 0..$#vertices; die "unknown locant in multicyclic compound\n" unless defined $position; $name->append_substituent_locant( $self->locants( $position ) ); } $name .= 'yl'; return $name; } sub suffix() { my( $self ) = @_; my @heteroatoms = $self->heteroatoms; if( @heteroatoms == 1 ) { if( $heteroatoms[0] eq 'N' ) { return ChemOnomatopist::Name::Part::Stem->new( 'acridine' )->to_name; } elsif( $heteroatoms[0] eq 'As' ) { return ChemOnomatopist::Name::Part::Stem->new( 'acridarsine' )->to_name; } elsif( $heteroatoms[0] eq 'P' ) { return ChemOnomatopist::Name::Part::Stem->new( 'acridophosphine' )->to_name; } my $name = ChemOnomatopist::Name::Part::Locants->new( '9H-' )->to_name; my $stem = ''; if( $heteroatoms[0] ne 'O' ) { $stem .= $elements{$heteroatoms[0]}->{prefix}; $stem =~ s/a$/o/; } $stem .= 'xanthene'; return $name->append_stem( $stem ); } elsif( @heteroatoms == 2 && $heteroatoms[0] eq $heteroatoms[1] && $heteroatoms[0] eq 'N' ) { return ChemOnomatopist::Name::Part::Stem->new( 'phenazine' )->to_name; } elsif( @heteroatoms == 2 && $heteroatoms[0] eq $heteroatoms[1] ) { my $name = $elements{$heteroatoms[0]}->{prefix}; $name =~ s/a$//; return ChemOnomatopist::Name::Part::Stem->new( $name . 'anthrene' )->to_name; } elsif( @heteroatoms == 2 && $heteroatoms[1] eq 'N' ) { # BBv2 P-25.2.2.3 my $name = ChemOnomatopist::Name::Part::Locants->new( '10H-' )->to_name; my $stem = 'pheno'; $stem =~ s/o$// if $elements{$heteroatoms[0]}->{prefix} =~ /^o/; $name->append_stem( $stem . $elements{$heteroatoms[0]}->{prefix} . 'zine' ); return $name; } elsif( @heteroatoms == 2 && $heteroatoms[0] eq 'O' ) { # BBv2 P-25.2.2.3 return 'phenoxathiine' if $heteroatoms[1] eq 'S'; if( any { $heteroatoms[1] eq $_ } qw( Se Te ) ) { my $stem = 'phenoxa' . $elements{$heteroatoms[1]}->{prefix}; $stem =~ s/a$/ine/; return ChemOnomatopist::Name::Part::Stem->new( $stem )->to_name; } elsif( any { $heteroatoms[1] eq $_ } qw( P As Sb ) ) { my $stem = 'phenoxa' . $elements{$heteroatoms[1]}->{prefix}; $stem =~ s/a$/inine/; return ChemOnomatopist::Name::Part::Stem->new( $stem )->to_name; } } elsif( @heteroatoms == 2 && $heteroatoms[0] eq 'S' && $heteroatoms[1] eq 'As' ) { # BBv2 P-25.2.2.3 return ChemOnomatopist::Name::Part::Stem->new( 'phenothiarsinine' )->to_name; } die "cannot name xanthene derivative\n"; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/ChainHalf.pm000066400000000000000000000025261452012116100226570ustar00rootroot00000000000000package ChemOnomatopist::ChainHalf; use strict; use warnings; use ChemOnomatopist::Chain; use parent ChemOnomatopist::Chain::; # ABSTRACT: Half of a longest chain # VERSION sub new { my( $class, $graph, $other_center, @vertices ) = @_; my $self = { vertices => \@vertices, graph => $graph, other_center => $other_center, cache => {} }; return bless $self, $class; } # Accessors # Groups are used to check which halves of a chain can be combined together. # If a graph contains single center, all halves will share the center. sub group() { my( $self ) = @_; return $self->{vertices}[1 - defined $self->{other_center}]; } sub _disconnected_chain_graph() { my( $self ) = @_; return $self->{_disconnected_chain_graph} if $self->{_disconnected_chain_graph}; my $graph = $self->graph->copy; my @vertices = $self->vertices; if( $self->{other_center} ) { # Cut the edge to the other center $graph->delete_edge( $vertices[0], $self->{other_center} ); } else { # Cut the edges to the other candidates for ($graph->neighbours( $vertices[0] )) { $graph->delete_edge( $vertices[0], $_ ); } } $graph->delete_path( @vertices ); $graph->delete_vertex( $self->parent ) if $self->parent; $self->{_disconnected_chain_graph} = $graph; return $graph; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Elements.pm000066400000000000000000000240411452012116100226120ustar00rootroot00000000000000package ChemOnomatopist::Elements; use strict; use warnings; # ABSTRACT: Element properties from IUPAC Blue Book # VERSION use parent Exporter::; our @EXPORT_OK = qw( %elements ); # Replacement prefixes and seniorities are taken from BBv2 Appendix 1 our %elements = ( F => { prefix => 'fluora', seniority => 0, }, Cl => { prefix => 'chlora', seniority => 1, }, Br => { prefix => 'broma', seniority => 2, }, I => { prefix => 'ioda', seniority => 3, }, At => { prefix => 'astata', seniority => 4, }, O => { prefix => 'oxa', seniority => 5, }, S => { prefix => 'thia', seniority => 6, }, Se => { prefix => 'selena', seniority => 7, }, Te => { prefix => 'tellura', seniority => 8, }, Po => { prefix => 'polona', seniority => 9, }, Lv => { prefix => 'livermora', seniority => 10, }, N => { prefix => 'aza', seniority => 11, }, P => { prefix => 'phospha', seniority => 12, }, As => { prefix => 'arsa', seniority => 13, }, Sb => { prefix => 'stiba', seniority => 14, }, Bi => { prefix => 'bisma', seniority => 15, }, C => { prefix => 'carba', seniority => 16, }, Si => { prefix => 'sila', seniority => 17, }, Ge => { prefix => 'germa', seniority => 18, }, Sn => { prefix => 'stanna', seniority => 19, }, Pb => { prefix => 'plumba', seniority => 20, }, Fl => { prefix => 'flerova', seniority => 21, }, B => { prefix => 'bora', seniority => 22, }, Al => { prefix => 'alumina', seniority => 23, }, Ga => { prefix => 'galla', seniority => 24, }, In => { prefix => 'inda', seniority => 25, }, Tl => { prefix => 'thalla', seniority => 26, }, Zn => { prefix => 'zinca', seniority => 27, }, Cd => { prefix => 'cadma', seniority => 28, }, Hg => { prefix => 'mercura', seniority => 29, }, Cn => { prefix => 'copernica', seniority => 30, }, Cu => { prefix => 'cupra', seniority => 31, }, Ag => { prefix => 'argenta', seniority => 32, }, Au => { prefix => 'aura', seniority => 33, }, Rg => { prefix => 'roentgena', seniority => 34, }, Ni => { prefix => 'nickela', seniority => 35, }, Pd => { prefix => 'pallada', seniority => 36, }, Pt => { prefix => 'platina', seniority => 37, }, Ds => { prefix => 'darmstadta', seniority => 38, }, Co => { prefix => 'cobalta', seniority => 39, }, Rh => { prefix => 'rhoda', seniority => 40, }, Ir => { prefix => 'irida', seniority => 41, }, Mt => { prefix => 'meitnera', seniority => 42, }, Fe => { prefix => 'ferra', seniority => 43, }, Ru => { prefix => 'ruthena', seniority => 44, }, Os => { prefix => 'osma', seniority => 45, }, Hs => { prefix => 'hassa', seniority => 46, }, Mn => { prefix => 'mangana', seniority => 47, }, Tc => { prefix => 'techneta', seniority => 48, }, Re => { prefix => 'rhena', seniority => 49, }, Bh => { prefix => 'bohra', seniority => 50, }, Cr => { prefix => 'chroma', seniority => 51, }, Mo => { prefix => 'molybda', seniority => 52, }, W => { prefix => 'tungsta', seniority => 53, }, Sg => { prefix => 'seaborga', seniority => 54, }, V => { prefix => 'vanada', seniority => 55, }, Nb => { prefix => 'nioba', seniority => 56, }, Ta => { prefix => 'tantala', seniority => 57, }, Db => { prefix => 'dubna', seniority => 58, }, Ti => { prefix => 'titana', seniority => 59, }, Zr => { prefix => 'zircona', seniority => 60, }, Hf => { prefix => 'hafna', seniority => 61, }, Rf => { prefix => 'rutherforda', seniority => 62, }, Sc => { prefix => 'scanda', seniority => 63, }, Y => { prefix => 'yttra', seniority => 64, }, La => { prefix => 'lanthana', seniority => 65, }, Ce => { prefix => 'cera', seniority => 66, }, Pr => { prefix => 'praseodyma', seniority => 67, }, Nd => { prefix => 'neodyma', seniority => 68, }, Pm => { prefix => 'prometha', seniority => 69, }, Sm => { prefix => 'samara', seniority => 70, }, Eu => { prefix => 'europa', seniority => 71, }, Gd => { prefix => 'gadolina', seniority => 72, }, Tb => { prefix => 'terba', seniority => 73, }, Dy => { prefix => 'dysprosa', seniority => 74, }, Ho => { prefix => 'holma', seniority => 75, }, Er => { prefix => 'erba', seniority => 76, }, Tm => { prefix => 'thula', seniority => 77, }, Yb => { prefix => 'ytterba', seniority => 78, }, Lu => { prefix => 'luteta', seniority => 79, }, Ac => { prefix => 'actina', seniority => 80, }, Th => { prefix => 'thora', seniority => 81, }, Pa => { prefix => 'protactina', seniority => 82, }, U => { prefix => 'urana', seniority => 83, }, Np => { prefix => 'neptuna', seniority => 84, }, Pu => { prefix => 'plutona', seniority => 85, }, Am => { prefix => 'america', seniority => 86, }, Cm => { prefix => 'cura', seniority => 87, }, Bk => { prefix => 'berkela', seniority => 88, }, Cf => { prefix => 'californa', seniority => 89, }, Es => { prefix => 'einsteina', seniority => 90, }, Fm => { prefix => 'ferma', seniority => 91, }, Md => { prefix => 'mendeleva', seniority => 92, }, No => { prefix => 'nobela', seniority => 93, }, Lr => { prefix => 'lawrenca', seniority => 94, }, Be => { prefix => 'berylla', seniority => 95, }, Mg => { prefix => 'magnesa', seniority => 96, }, Ca => { prefix => 'calca', seniority => 97, }, Sr => { prefix => 'stronta', seniority => 98, }, Ba => { prefix => 'bara', seniority => 99, }, Ra => { prefix => 'rada', seniority => 100, }, Li => { prefix => 'litha', seniority => 101, }, Na => { prefix => 'soda', seniority => 102, }, K => { prefix => 'potassa', seniority => 103, }, Rb => { prefix => 'rubida', seniority => 104, }, Cs => { prefix => 'caesa', seniority => 105, }, Fr => { prefix => 'franca', seniority => 106, }, He => { prefix => 'hela', seniority => 107, }, Ne => { prefix => 'neona', seniority => 108, }, Ar => { prefix => 'argona', seniority => 109, }, Kr => { prefix => 'kryptona', seniority => 110, }, Xe => { prefix => 'xenona', seniority => 111, }, Rn => { prefix => 'radona', seniority => 112, }, ); # Hantzsch-Widman system prefixes, where different, taken from BBv2 P-22.2.2.1.1, Table 2.4 my %old_elements = ( B => { standard_bonding_number => 3, }, C => { standard_bonding_number => 4, }, N => { standard_bonding_number => 3, }, O => { standard_bonding_number => 2, }, F => { standard_bonding_number => 1, }, Al => { HantzschWidman => 'aluma', standard_bonding_number => 3, }, Si => { standard_bonding_number => 4, }, P => { standard_bonding_number => 3, }, S => { standard_bonding_number => 2, }, Cl => { standard_bonding_number => 1, }, Ga => { standard_bonding_number => 3, }, Ge => { standard_bonding_number => 4, }, As => { standard_bonding_number => 3, }, Se => { standard_bonding_number => 2, }, Br => { standard_bonding_number => 1, }, In => { HantzschWidman => 'indiga', standard_bonding_number => 3, }, Sn => { standard_bonding_number => 4, }, Sb => { standard_bonding_number => 3, }, Te => { standard_bonding_number => 2, }, I => { standard_bonding_number => 1, }, Tl => { standard_bonding_number => 3, }, Pb => { standard_bonding_number => 4, }, Bi => { standard_bonding_number => 3, }, Po => { standard_bonding_number => 2, }, At => { standard_bonding_number => 1, }, ); for my $element (keys %old_elements) { for my $key (qw( HantzschWidman standard_bonding_number )) { next unless exists $old_elements{$element}->{$key}; $elements{$element}->{$key} = $old_elements{$element}->{$key}; } } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group.pm000066400000000000000000000123421452012116100221330ustar00rootroot00000000000000package ChemOnomatopist::Group; use strict; use warnings; use ChemOnomatopist::Chain::Circular; use ChemOnomatopist::Group::AcylHalide; use ChemOnomatopist::Group::Aldehyde; use ChemOnomatopist::Group::Amide; use ChemOnomatopist::Group::Amine; use ChemOnomatopist::Group::Carboxyl; use ChemOnomatopist::Group::Cyanide; use ChemOnomatopist::Group::Ester; use ChemOnomatopist::Group::Ether; use ChemOnomatopist::Group::Guanidine; use ChemOnomatopist::Group::Hydrazide; use ChemOnomatopist::Group::Hydrazine; use ChemOnomatopist::Group::Hydroperoxide; use ChemOnomatopist::Group::Hydroxy; use ChemOnomatopist::Group::Imino; use ChemOnomatopist::Group::Ketone; use ChemOnomatopist::Group::SulfinicAcid; use ChemOnomatopist::Group::SulfonicAcid; use List::Util qw( any ); use Scalar::Util qw( blessed ); # ABSTRACT: Chemical group # VERSION # Order from BBv2 P-41 our @order = ( # Radicals # Radical anions # Radical cations # Anions # Zwitterions # Cations # Acids (BBv2 P-42) ChemOnomatopist::Group::Carboxyl::, ChemOnomatopist::Group::SulfonicAcid::, ChemOnomatopist::Group::SulfinicAcid::, ChemOnomatopist::Group::AcylHalide::, # FIXME: Is this correct? # Anhydrides ChemOnomatopist::Group::Ester::, # Acid halides and pseudohalides # Amides ChemOnomatopist::Group::Amide::, ChemOnomatopist::Group::Guanidine::, # Hydrazides ChemOnomatopist::Group::Hydrazide::, # Imides # 14. Nitriles ChemOnomatopist::Group::Cyanide::, ChemOnomatopist::Group::Aldehyde::, ChemOnomatopist::Group::Ketone::, ChemOnomatopist::Group::Hydroxy::, ChemOnomatopist::Group::Hydroperoxide::, ChemOnomatopist::Group::Amine::, ChemOnomatopist::Group::Imino::, # TODO: Some are omitted # TODO: Classes denoted by the senior atom in heterane nomenclature should go here # 21. Nitrogen compounds ChemOnomatopist::Group::Hydrazine::, # 41. Ethers, then sulfides, sulfoxides, sulfones; then selenides, selenoxides, etc. ChemOnomatopist::Group::Ether::, ); sub new { my( $class, $element ) = @_; return bless { element => $element }, $class; } sub element() { return $_[0]->{element} } sub is_carbon() { my( $self ) = @_; return $self->element && $self->element eq 'C'; } sub is_nitrogen() { my( $self ) = @_; return $self->element && $self->element eq 'N'; } sub is_oxygen() { my( $self ) = @_; return $self->element && $self->element eq 'O'; } sub is_part_of_chain() { return '' } # Certain groups can only be expressed as prefixes sub is_prefix_only() { return '' } # Certain groups can only be terminal in chains sub is_terminal() { return '' } sub needs_heteroatom_locants { return 1 } sub needs_heteroatom_names { return 1 } sub needs_multiple_bond_suffix { return 1 } sub prefix() { return '' } sub suffix() { return $_[0]->is_prefix_only ? undef : '' } sub multisuffix() { return $_[0]->suffix } sub suffix_if_cycle_substituent() { return $_[0]->suffix } sub candidate_for() { my( $self ) = @_; return undef unless exists $self->{candidate_for}; return $self->{candidate_for}; } sub rule_greatest_number_of_most_senior_heteroatoms { my( @chains ) = @_; # This order is taken from BBv2 P-41 and is different from order in %elements my @element_order = qw( N P As Sb Bi Si Ge Sn Pb B Al Ga In Tl O S Se Te ); my %element_order = map { $element_order[$_] => $_ } 0..$#element_order; my( $most_senior ) = sort { $element_order{$a} <=> $element_order{$b} } grep { exists $element_order{$_} } map { $_->heteroatoms } @chains; return @chains unless $most_senior; my( $max_value ) = reverse sort map { scalar( grep { $_ eq $most_senior } $_->heteroatoms ) } @chains; return grep { scalar( grep { $_ eq $most_senior } $_->heteroatoms ) == $max_value } @chains; } # Compare seniority of two objects sub cmp { my( $A, $B ) = @_; my( $A_pos ) = grep { $A->isa( $order[$_] ) } 0..$#order; my( $B_pos ) = grep { $B->isa( $order[$_] ) } 0..$#order; # Clear distinction exists if( defined $A_pos && defined $B_pos && $A_pos <=> $B_pos ) { return $A_pos <=> $B_pos; } # Any of the objects is in the priority list if( defined $A_pos ^ defined $B_pos ) { return defined $B_pos <=> defined $A_pos; } # Same class; class should know how to compare if( blessed $A eq blessed $B ) { return $A->_cmp_instances( $B ); } # BBv2 P-41 # First, the chain with the most senior atom wins # FIXME: Select just by seniority, not by number my @chains = rule_greatest_number_of_most_senior_heteroatoms( $A, $B ); return ($chains[0] == $B) * 2 - 1 if @chains; # Second, the order is heterocycles, polyheteroatom, heteroatom if( $A->isa( ChemOnomatopist::Chain::Circular:: ) + 0 ^ $B->isa( ChemOnomatopist::Chain::Circular:: ) + 0 ) { return $B->isa( ChemOnomatopist::Chain::Circular:: ) <=> $A->isa( ChemOnomatopist::Chain::Circular:: ); } # TODO: The remaining rules from P-41 die "cannot compare\n"; } # Two instances of the same group are thought to be of the same seniority sub _cmp_instances { return 0 } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/000077500000000000000000000000001452012116100215735ustar00rootroot00000000000000ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/AcylHalide.pm000066400000000000000000000013431452012116100241310ustar00rootroot00000000000000package ChemOnomatopist::Group::AcylHalide; use strict; use warnings; # ABSTRACT: Acyl halide group # VERSION use ChemOnomatopist::Elements qw( %elements ); use parent ChemOnomatopist::Group::; sub new { my( $class, $halide ) = @_; return bless { halide => $halide }, $class; } sub element() { return 'C' } sub prefix() { my( $self ) = @_; my $name = 'carbono' . $elements{$self->{halide}{symbol}}->{prefix}; $name =~ s/a$/idoyl/; return $name; } sub suffix() { my( $self ) = @_; my $name = 'oyl ' . $elements{$self->{halide}{symbol}}->{prefix}; $name =~ s/a$/ide/; return $name; } sub _cmp_instances { my( $A, $B ) = @_; return $A->{halide}{symbol} cmp $B->{halide}{symbol}; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Aldehyde.pm000066400000000000000000000010631452012116100236500ustar00rootroot00000000000000package ChemOnomatopist::Group::Aldehyde; use strict; use warnings; # ABSTRACT: Aldehyde group # VERSION use parent ChemOnomatopist::Group::; sub new { my( $class, $ketone ) = @_; return bless { ketone => $ketone }, $class; } sub element { return 'C' } sub is_part_of_chain() { return 1 } sub prefix { return 'formyl' } sub suffix() { my( $self ) = @_; my $name = $self->{ketone}->suffix; $name =~ s/one$/al/; return $name; } sub multisuffix { return 'carbaldehyde' } sub suffix_if_cycle_substituent { return 'carbaldehyde' } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Amide.pm000066400000000000000000000005471452012116100231560ustar00rootroot00000000000000package ChemOnomatopist::Group::Amide; use strict; use warnings; # ABSTRACT: Amide group # VERSION use parent ChemOnomatopist::Group::; sub new { my( $class, $parent ) = @_; return bless { parent => $parent }, $class; } sub element() { return 'N' } sub is_terminal() { return 1 } sub prefix { return 'amido' } sub suffix { return 'amide' } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Amine.pm000066400000000000000000000004121452012116100231570ustar00rootroot00000000000000package ChemOnomatopist::Group::Amine; use strict; use warnings; # ABSTRACT: Amino group # VERSION use parent ChemOnomatopist::Group::; sub element() { return 'N' } sub is_terminal() { return 1 } sub prefix { return 'amino' } sub suffix { return 'amine' } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Carboxyl.pm000066400000000000000000000006431452012116100237170ustar00rootroot00000000000000package ChemOnomatopist::Group::Carboxyl; use strict; use warnings; # ABSTRACT: Carboxyl group # VERSION use parent ChemOnomatopist::Group::; sub element() { return 'C' } sub prefix() { return 'carboxy' } sub suffix() { return 'oic acid' } # FIXME: Should be 'carboxylic acid' if attached to cycles sub multisuffix() { return 'carboxylic acid' } sub suffix_if_cycle_substituent() { return $_[0]->multisuffix } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Cyanide.pm000066400000000000000000000004571452012116100235130ustar00rootroot00000000000000package ChemOnomatopist::Group::Cyanide; use strict; use warnings; # ABSTRACT: Cyanide group # VERSION use parent ChemOnomatopist::Group::; # FIXME: When part of chain, suffix is 'nitrile'; when an attachment - 'carbonitrile'. sub prefix { return 'cyano' } sub suffix { return 'carbonitrile' } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Ester.pm000066400000000000000000000016571452012116100232240ustar00rootroot00000000000000package ChemOnomatopist::Group::Ester; use strict; use warnings; # ABSTRACT: Ester group # VERSION use parent ChemOnomatopist::Group::; use ChemOnomatopist; use ChemOnomatopist::Util qw( copy ); use Scalar::Util qw( blessed ); sub new { my( $class, $hydroxylic, $acid ) = @_; return bless { hydroxylic => $hydroxylic, acid => $acid }, $class; } sub element { return 'C' } sub name { my( $class, $graph ) = @_; # TODO: Assume monoester my( $ester ) = grep { blessed $_ && $_->isa( ChemOnomatopist::Group::Ester:: ) } $graph->vertices; $graph = copy $graph; $graph->delete_edge( $ester, $ester->{hydroxylic} ); my $hydroxylic_part = ChemOnomatopist::get_sidechain_name( $graph, undef, $ester->{hydroxylic} ); my $acid_part = ChemOnomatopist::get_sidechain_name( $graph, undef, $ester ); $acid_part =~ s/yl$/anoate/; return "$hydroxylic_part $acid_part"; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Ether.pm000066400000000000000000000004141452012116100231770ustar00rootroot00000000000000package ChemOnomatopist::Group::Ether; use strict; use warnings; # ABSTRACT: Ether group # VERSION use parent ChemOnomatopist::Group::; sub element() { return 'O' } sub is_part_of_chain() { return 1 } sub prefix() { return 'oxy' } sub suffix() { return '' } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Guanidine.pm000066400000000000000000000037211452012116100240370ustar00rootroot00000000000000package ChemOnomatopist::Group::Guanidine; use strict; use warnings; # ABSTRACT: Guanidine group # VERSION use parent ChemOnomatopist::Group::, ChemOnomatopist::Chain::; use Algorithm::Combinatorics qw( permutations ); use Chemistry::OpenSMILES qw( is_double_bond ); sub new { my( $class, $graph, $atom ) = @_; # Double ! is used here as is_double_bond() returns 1 or undef which is a bug in Chemistry::OpenSMILES? my @vertices = sort { !!is_double_bond( $graph, $atom, $a ) <=> !!is_double_bond( $graph, $atom, $b ) } $graph->neighbours( $atom ); my @orders = map { !!is_double_bond( $graph, $atom, $_ ) } @vertices; return bless { graph => $graph, vertices => \@vertices, is_double_bond => \@orders }, $class; } sub candidates() { my( $self ) = @_; my @candidates; if( $self->{is_double_bond}[2] ) { @candidates = ( $self, $self->copy ); $candidates[1]->{vertices} = [ map { $self->{vertices}[$_] } ( 1, 0, 2 ) ]; $candidates[1]->{candidate_for} = $self; } else { for (permutations([0, 1, 2])) { push @candidates, $self->copy; $candidates[-1]->{vertices} = [ map { $self->{vertices}[$_] } @$_ ]; $candidates[-1]->{candidate_for} = $self; } } return @candidates; } sub copy() { my( $self ) = @_; return bless { %$self }; } sub needs_heteroatom_locants() { return '' } sub needs_heteroatom_names() { return '' } sub needs_substituent_locants() { return 1 } # FIXME: There may be identical substituents, what to do then? sub locants(@) { my $self = shift; return map { 'N' . "'" x $_ } @_; } # Two kinds exist per BBv2 P-66.4.1.2.1.3 sub prefix { my( $self ) = @_; if( $self->{is_double_bond}[2] && $self->graph->degree( $self->{vertices}[2] ) > 2 ) { return '[(diaminomethylidene)amino]'; } else { return '(carbamimidoylamino)'; } } sub suffix { return 'guanidine' } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Hydrazide.pm000066400000000000000000000014071452012116100240560ustar00rootroot00000000000000package ChemOnomatopist::Group::Hydrazide; use strict; use warnings; # ABSTRACT: Hydrazide group # VERSION use parent ChemOnomatopist::Group::, ChemOnomatopist::Chain::; sub new { my( $class, $graph, @vertices ) = @_; return bless { graph => $graph, vertices => \@vertices }, $class; } sub needs_heteroatom_locants() { return '' } sub needs_heteroatom_names() { return '' } sub needs_substituent_locants() { my( $self ) = @_; return $self->number_of_branches > 1 && $self->number_of_branches < $self->max_valence; } sub locants(@) { my $self = shift; return map { $_ == 0 ? "N'" : $_ == 1 ? 'N' : $_ - 1 } @_; } sub prefix() { my( $self ) = @_; return 'hydrazidyl'; } sub suffix() { my( $self ) = @_; return 'hydrazide'; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Hydrazine.pm000066400000000000000000000015661452012116100240760ustar00rootroot00000000000000package ChemOnomatopist::Group::Hydrazine; use strict; use warnings; # ABSTRACT: Hydrazine group # VERSION use parent ChemOnomatopist::Group::, ChemOnomatopist::Chain::; sub new { my( $class, $graph, @vertices ) = @_; return bless { graph => $graph, vertices => \@vertices }, $class; } sub candidates() { my( $self ) = @_; my @chains = ( $self, ChemOnomatopist::Group::Hydrazine->new( $self->graph, reverse $self->vertices ) ); $chains[1]->{candidate_for} = $self; return @chains; } sub needs_heteroatom_locants() { return '' } sub needs_heteroatom_names() { return '' } sub needs_substituent_locants() { my( $self ) = @_; return $self->number_of_branches > 1 && $self->number_of_branches < $self->max_valence; } sub prefix() { my( $self ) = @_; return 'hydrazinyl'; } sub suffix() { my( $self ) = @_; return 'hydrazine'; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Hydroperoxide.pm000066400000000000000000000027261452012116100247650ustar00rootroot00000000000000package ChemOnomatopist::Group::Hydroperoxide; use strict; use warnings; # ABSTRACT: Hydroperoxide group # VERSION use ChemOnomatopist::Elements qw( %elements ); use List::Util qw( all ); use parent ChemOnomatopist::Group::; sub new { my( $class, @atoms ) = @_; return bless { atoms => \@atoms }, $class; } sub element() { return $_[0]->{atoms}[0]{symbol} } sub prefix { my( $self ) = @_; my @elements = map { ChemOnomatopist::element( $_ ) } @{$self->{atoms}}; return 'hydroperoxy' if all { $_ eq 'O' } @elements; return 'disulfanyl' if all { $_ eq 'S' } @elements; my $name = ''; for my $element (reverse @elements) { # FIXME: Incomplete if( $element eq 'O' ) { $name .= 'hydr' unless $name; $name .= 'oxy'; } $name .= 'sulfanyl' if $element eq 'S'; $name .= 'selanyl' if $element eq 'Se'; $name .= 'tellanyl' if $element eq 'Te'; } return $name; } sub suffix { my( $self ) = @_; my @elements = map { ChemOnomatopist::element( $_ ) } @{$self->{atoms}}; return 'peroxol' if all { $_ eq 'O' } @elements; my $name; if( $elements[0] eq $elements[1] ) { $name = 'di' . $elements{$elements[0]}->{prefix}; $name =~ s/a$/o/; } else { $name = '-' . join( '', @elements ) . '-' . join '', sort map { s/a$/o/; $_ } map { $elements{$_}->{prefix} } grep { $_ ne 'O' } @elements; } return $name . 'peroxol'; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Hydroxy.pm000066400000000000000000000010751452012116100236020ustar00rootroot00000000000000package ChemOnomatopist::Group::Hydroxy; use strict; use warnings; # ABSTRACT: Hydroxy group # VERSION use parent ChemOnomatopist::Group::; # From BBv2 P-63.1.5 my %prefixes = ( O => 'hydroxy', S => 'sulfanyl', Se => 'selanyl', Te => 'tellanyl' ); my %suffixes = ( O => 'ol', S => 'thiol', Se => 'selenol', Te => 'tellurol' ); sub prefix { my( $self ) = @_; return $prefixes{$self->element}; } sub suffix { my( $self ) = @_; return $suffixes{$self->element}; } sub _cmp_instances { my( $A, $B ) = @_; return $A->element cmp $B->element } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Imino.pm000066400000000000000000000004701452012116100232050ustar00rootroot00000000000000package ChemOnomatopist::Group::Imino; use strict; use warnings; # ABSTRACT: Imino group # VERSION use parent ChemOnomatopist::Group::; sub element() { return 'N' } sub is_terminal() { return 1 } sub needs_multiple_bond_suffix { return '' } sub prefix { return 'imino' } sub suffix { return 'imine' } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Ketone.pm000066400000000000000000000011651452012116100233610ustar00rootroot00000000000000package ChemOnomatopist::Group::Ketone; use strict; use warnings; # ABSTRACT: Ketone group # VERSION use parent ChemOnomatopist::Group::; # From BBv2 P-64.6.1 my %prefixes = ( O => 'oxo', S => 'sulfanylidene', Se => 'selanylidene', Te => 'tellanylidene' ); my %suffixes = ( O => 'one', S => 'thione', Se => 'selone', Te => 'tellone' ); sub needs_multiple_bond_suffix { return '' } sub prefix { my( $self ) = @_; return $prefixes{$self->element}; } sub suffix { my( $self ) = @_; return $suffixes{$self->element}; } sub _cmp_instances { my( $A, $B ) = @_; return $A->element cmp $B->element; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Nitro.pm000066400000000000000000000003211452012116100232200ustar00rootroot00000000000000package ChemOnomatopist::Group::Nitro; use strict; use warnings; # ABSTRACT: Nitro group # VERSION use parent ChemOnomatopist::Group::; sub prefix { return 'nitro' } sub is_prefix_only() { return 1 } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Nitroso.pm000066400000000000000000000007301452012116100235660ustar00rootroot00000000000000package ChemOnomatopist::Group::Nitroso; use strict; use warnings; # ABSTRACT: Nitroso group or its analogue # VERSION use parent ChemOnomatopist::Group::; use ChemOnomatopist::Elements qw( %elements ); sub is_prefix_only() { return 1 } # Compiled from BBv2 Table 5.1 (P-59.1.9) sub prefix { my( $self ) = @_; return 'nitroso' if $self->element eq 'N'; my $prefix = $elements{$self->element}{prefix}; $prefix =~ s/a$/osyl/; return $prefix; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/SulfinicAcid.pm000066400000000000000000000004341452012116100244670ustar00rootroot00000000000000package ChemOnomatopist::Group::SulfinicAcid; use strict; use warnings; # ABSTRACT: Sulfinic acid group # VERSION use parent ChemOnomatopist::Group::; sub element() { return 'S' } # From BBv2 P-65.3.0 sub prefix() { return 'sulfino' } sub suffix() { return 'sulfinic acid' } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Sulfinyl.pm000066400000000000000000000005031452012116100237340ustar00rootroot00000000000000package ChemOnomatopist::Group::Sulfinyl; use strict; use warnings; # ABSTRACT: Sulfinyl group # VERSION use parent ChemOnomatopist::Group::; my %prefixes = ( S => 'sulfinyl', Se => 'seleninyl', Te => 'tellurinyl', ); sub prefix { return $prefixes{$_[0]->element} } sub is_prefix_only() { return 1 } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/SulfonicAcid.pm000066400000000000000000000004321452012116100244730ustar00rootroot00000000000000package ChemOnomatopist::Group::SulfonicAcid; use strict; use warnings; # ABSTRACT: Sulfonic acid group # VERSION use parent ChemOnomatopist::Group::; sub element() { return 'S' } # From BBv2 P-65.3.0 sub prefix() { return 'sulfo' } sub suffix() { return 'sulfonic acid' } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/Sulfonyl.pm000066400000000000000000000005031452012116100237420ustar00rootroot00000000000000package ChemOnomatopist::Group::Sulfonyl; use strict; use warnings; # ABSTRACT: Sulfonyl group # VERSION use parent ChemOnomatopist::Group::; my %prefixes = ( S => 'sulfonyl', Se => 'selenonyl', Te => 'telluronyl', ); sub prefix { return $prefixes{$_[0]->element} } sub is_prefix_only() { return 1 } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Group/XO3.pm000066400000000000000000000006271452012116100225470ustar00rootroot00000000000000package ChemOnomatopist::Group::XO3; use strict; use warnings; # ABSTRACT: XO3 group # VERSION use parent ChemOnomatopist::Group::; use ChemOnomatopist::Elements qw( %elements ); sub is_prefix_only() { return 1 } # Compiled from BBv2 Table 5.1 (P-59.1.9) sub prefix { my( $self ) = @_; my $prefix = 'per' . $elements{$self->element}{prefix}; $prefix =~ s/a$/yl/; return $prefix; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/MolecularGraph.pm000066400000000000000000000037551452012116100237540ustar00rootroot00000000000000package ChemOnomatopist::MolecularGraph; use strict; use warnings; # ABSTRACT: Graph extension for molecular graphs # VERSION use parent Graph::Undirected::; use ChemOnomatopist::Util::Graph; use List::Util qw( all ); use Set::Object qw( set ); sub new { my $class = shift; if( ref $class ) { return bless $class->SUPER::new( refvertexed => 1 ); } elsif( @_ == 1 && $_[0]->isa( Graph::Undirected:: ) ) { return bless $_[0], $class; } else { return bless Graph::Undirected->new( @_, refvertexed => 1 ), $class; } } # Graph::copy() does not copy edge attributes sub copy() { my( $self ) = @_; my $copy = $self->SUPER::copy; for my $edge ($self->edges) { next unless $self->has_edge_attributes( @$edge ); $copy->set_edge_attributes( @$edge, $self->get_edge_attributes( @$edge ) ); } $copy->set_graph_attributes( $self->get_graph_attributes ); return bless $copy; } sub subgraph() { my( $self, @vertices ) = @_; if( all { ref $_ eq 'ARRAY' } @vertices ) { return bless $self->SUPER::subgraph( @vertices ); } else { return ChemOnomatopist::Util::Graph::subgraph( $self, @vertices ); } } sub add_group($) { my( $self, $group ) = @_; $self->set_graph_attribute( 'groups', [] ) unless $self->has_graph_attribute( 'groups' ); push @{$self->get_graph_attribute( 'groups' )}, $group; } sub delete_group($) { my( $self, $group ) = @_; return unless $self->has_graph_attribute( 'groups' ); @{$self->get_graph_attribute( 'groups' )} = grep { $_ != $group } @{$self->get_graph_attribute( 'groups' )}; } sub groups(@) { my( $self, @vertices ) = @_; return () unless $self->has_graph_attribute( 'groups' ); if( @vertices ) { my $vertices = set( @vertices ); return grep { $vertices <= set( $_->vertices ) } @{$self->get_graph_attribute( 'groups' )}; } else { return @{$self->get_graph_attribute( 'groups' )}; } } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Name.pm000066400000000000000000000146721452012116100217270ustar00rootroot00000000000000package ChemOnomatopist::Name; use strict; use warnings; # ABSTRACT: Chemical name # VERSION use overload '.=' => \&append; use overload '""' => sub { return join '', @{$_[0]->{name}} }; use overload 'eq' => sub { return "$_[0]" eq "$_[1]" }; use overload 'cmp' => sub { return ("$_[0]" cmp "$_[1]") * ($_[2] ? -1 : 1) }; use overload '@{}' => sub { return $_[0]->{name} }; use ChemOnomatopist::Name::Part::AlkaneANSuffix; use ChemOnomatopist::Name::Part::Element; use ChemOnomatopist::Name::Part::Fusion; use ChemOnomatopist::Name::Part::Locants; use ChemOnomatopist::Name::Part::Locants::Substituent; use ChemOnomatopist::Name::Part::Multiplier; use ChemOnomatopist::Name::Part::Stem; use Clone qw( clone ); use List::Util qw( any ); use Scalar::Util qw( blessed ); sub new { my( $class, $name ) = @_; my @name_parts; if( defined $name && blessed $name ) { push @name_parts, $name; } elsif( defined $name && $name ne '' ) { push @name_parts, $name; } return bless { name => \@name_parts }, $class; } # TODO: Implement vowel elision as written in BBv2 P-16.7 sub append($) { my( $self, $name ) = @_; $self->[-1] =~ s/a$// if $name =~ /^a/ && @$self; $self->[-1] =~ s/o$// if $name =~ /^o/ && @$self && $self->[-1] ne 'cyclo'; # BBv2 P-16.7.1 (d) my $terminal_element; if( @$self && blessed $self->[-1] && $self->[-1]->isa( ChemOnomatopist::Name::Part::Element:: ) && "$name" =~ /^[aeiouy]/ ) { $self->[-1] =~ s/a$//; } # If names are combined and the second one starts with a number, a separator is added. if( @$self && blessed $name && $name->isa( ChemOnomatopist::Name:: ) && $name =~ /^\d/ ) { push @$self, '-'; } # FIXME: The following needlessly converts $name into string $name =~ s/^-// if @$self && $self->[-1] =~ /-$/ && $name =~ /^-/; push @$self, blessed $name && $name->isa( ChemOnomatopist::Name:: ) ? @$name : $name; return $self; } sub append_element($) { my( $self, $element ) = @_; return $self->append( ChemOnomatopist::Name::Part::Element->new( $element ) ); } sub append_locants { my( $self, @locants ) = @_; return $self unless @locants; if( @$self ) { $self->append( ChemOnomatopist::Name::Part::Locants->new( '-' . join( ',', @locants ) . '-' ) ); } else { $self->append( ChemOnomatopist::Name::Part::Locants->new( join( ',', @locants ) . '-' ) ); } return $self; } sub append_multiplier($) { my( $self, $string ) = @_; return $self if $string eq ''; return $self->append( ChemOnomatopist::Name::Part::Multiplier->new( $string ) ); } sub append_stem($) { my( $self, $stem ) = @_; return $self->append( ChemOnomatopist::Name::Part::Stem->new( $stem ) ); } sub append_substituent_locant($) { my( $self, $locant ) = @_; $self->append( ChemOnomatopist::Name::Part::Locants::Substituent->new( '-' . $locant . '-' ) ); $self->{name} = [ 'tert-but' ] if $self eq '2-methylpropan-2-'; return $self; } sub append_suffix($) { my( $self, $suffix ) = @_; if( @$self ) { if( $suffix =~ /^[aeiouy]/ ) { $self->[-2] =~ s/e$// if $self =~ /e-[0-9,]+-$/; # BBv2 P-16.7.1 (a) $self->[-1] =~ s/e$// if $self =~ /e$/; } $self->[-1] =~ s/a$// if $self->ends_with_multiplier && $suffix =~ /^[ao]/; # BBv2 P-16.7.1 (b) } return $self->append( $suffix ); } # FIXME: Implement according to BBv2 P-16.5.4: {[({[( )]})]} sub bracket() { my( $self ) = @_; my $name_wo_fusion = clone $self; # BBv2 P-16.5.4.1.2: fusion indicators are ignored @$name_wo_fusion = grep { !blessed $_ || !$_->isa( ChemOnomatopist::Name::Part::Fusion:: ) } @$name_wo_fusion; if( $name_wo_fusion =~ /\{/ ) { unshift @$self, '('; push @$self, ')'; } elsif( $name_wo_fusion =~ /\[/ ) { unshift @$self, '{'; push @$self, '}'; } elsif( $name_wo_fusion =~ /\(/ ) { unshift @$self, '['; push @$self, ']'; } else { unshift @$self, '('; push @$self, ')'; } return $self; } sub bracket_numeric_locants() { my( $self ) = @_; if( $self->starts_with_locant && $self->[0]->is_numeric ) { $self->[0]->{value} = '[' . $self->[0]->{value}; $self->[0]->{value} =~ s/-$/]/; } for my $i (1..$#$self) { next unless blessed $self->[$i]; next unless $self->[$i]->isa( ChemOnomatopist::Name::Part::Locants:: ); $self->[$i-1] = '['; $self->[$i]->{value} =~ s/-$/]/; } } sub has_locant() { my( $self ) = @_; return any { blessed $_ && $_->isa( ChemOnomatopist::Name::Part::Locants:: ) } @$self; } sub has_substituent_locant() { my( $self ) = @_; return any { blessed $_ && $_->isa( ChemOnomatopist::Name::Part::Locants::Substituent:: ) } @$self; } sub is_enclosed() { my( $self ) = @_; return '' unless @$self; return $self->[0] =~ /^[\(\[\{]/ && $self->[-1] =~ /[\)\]\}]$/; } sub is_simple() { my( $self ) = @_; my $nelements = scalar grep { blessed $_ && $_->isa( ChemOnomatopist::Name::Part::Element:: ) } @$self; my $nstems = scalar grep { blessed $_ && $_->isa( ChemOnomatopist::Name::Part::Stem:: ) } @$self; return '' if $nstems >= 2; return '' if $nstems && $nelements; return 1; } # FIXME: Incomplete, untested and unused # 0 if simple, 1 if compound sub level() { my( $self ) = @_; return 0 + ((grep { blessed $_ && $_->isa( ChemOnomatopist::Name::Part::Stem:: ) } @$self) > 1); } sub ends_with_alkane_an_suffix() { my( $self ) = @_; return @$self && blessed $self->[-1] && $self->[-1]->isa( ChemOnomatopist::Name::Part::AlkaneANSuffix:: ); } sub starts_with_locant() { my( $self ) = @_; return @$self && blessed $self->[0] && $self->[0]->isa( ChemOnomatopist::Name::Part::Locants:: ); } sub starts_with_multiplier() { my( $self ) = @_; return @$self && blessed $self->[0] && $self->[0]->isa( ChemOnomatopist::Name::Part::Multiplier:: ); } sub ends_with_multiplier() { my( $self ) = @_; return @$self && blessed $self->[-1] && $self->[-1]->isa( ChemOnomatopist::Name::Part::Multiplier:: ); } sub ends_with_stem() { my( $self ) = @_; return @$self && blessed $self->[-1] && $self->[-1]->isa( ChemOnomatopist::Name::Part::Stem:: ); } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Name/000077500000000000000000000000001452012116100213575ustar00rootroot00000000000000ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Name/Part.pm000066400000000000000000000007171452012116100226300ustar00rootroot00000000000000package ChemOnomatopist::Name::Part; use strict; use warnings; # ABSTRACT: Semantic part of a chemical name # VERSION use ChemOnomatopist::Name; use overload '""' => sub { return $_[0]->{value} }; use overload 'cmp' => sub { return ("$_[0]" cmp "$_[1]") * ($_[2] ? -1 : 1) }; sub new { my( $class, $value ) = @_; return bless { value => $value }, $class; } sub to_name() { my( $self ) = @_; return ChemOnomatopist::Name->new( $self ); } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Name/Part/000077500000000000000000000000001452012116100222655ustar00rootroot00000000000000ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Name/Part/AlkaneANSuffix.pm000066400000000000000000000002471452012116100254250ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::AlkaneANSuffix; use strict; use warnings; # ABSTRACT: Alkane AN suffix # VERSION use parent ChemOnomatopist::Name::Part::; 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Name/Part/Element.pm000066400000000000000000000002631452012116100242150ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::Element; use strict; use warnings; # ABSTRACT: Element name inside a chemical name # VERSION use parent ChemOnomatopist::Name::Part::; 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Name/Part/Fusion.pm000066400000000000000000000002571452012116100240720ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::Fusion; use strict; use warnings; # ABSTRACT: Fusion indicator for fused rings # VERSION use parent ChemOnomatopist::Name::Part::; 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Name/Part/Locants.pm000066400000000000000000000003611452012116100242260ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::Locants; use strict; use warnings; # ABSTRACT: Locants of a chemical name # VERSION use parent ChemOnomatopist::Name::Part::; sub is_numeric() { return $_[0]->{value} =~ /^\]?\d+(,\d+)*[-\]]?$/ } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Name/Part/Locants/000077500000000000000000000000001452012116100236705ustar00rootroot00000000000000ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Name/Part/Locants/Substituent.pm000066400000000000000000000003001452012116100265500ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::Locants::Substituent; use strict; use warnings; # ABSTRACT: Locants of a chemical name # VERSION use parent ChemOnomatopist::Name::Part::Locants::; 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Name/Part/Multiplier.pm000066400000000000000000000002601452012116100247470ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::Multiplier; use strict; use warnings; # ABSTRACT: Multiplier of a chemical name # VERSION use parent ChemOnomatopist::Name::Part::; 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Name/Part/Stem.pm000066400000000000000000000002441452012116100235330ustar00rootroot00000000000000package ChemOnomatopist::Name::Part::Stem; use strict; use warnings; # ABSTRACT: Stem of a chemical name # VERSION use parent ChemOnomatopist::Name::Part::; 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Old.pm000066400000000000000000000672221452012116100215640ustar00rootroot00000000000000package ChemOnomatopist::Old; use strict; use warnings; # ABSTRACT: Give molecule a name # VERSION use parent ChemOnomatopist::; use ChemOnomatopist::Chain; use ChemOnomatopist::MolecularGraph; use ChemOnomatopist::Util::Graph qw( BFS_calculate_chain_length BFS_is_chain_branched ); use Clone qw( clone ); use Graph::Traversal::BFS; use Graph::Undirected; use List::Util qw( all any max min sum0 uniq ); use Scalar::Util qw( blessed ); # There must be a nicer way to handle calls to parent... sub AUTOLOAD { our $AUTOLOAD; my $call = $AUTOLOAD; $call =~ s/.*:://; return if $call eq 'DESTROY'; return ChemOnomatopist->can( $call )->( @_ ); } no warnings 'recursion'; sub get_name { my( $what ) = @_; # Detect the type of the input data my( $graph ); if( blessed $what && $what->isa( Graph::Undirected:: ) ) { $graph = $what; } else { # Assume SMILES string require Chemistry::OpenSMILES::Parser; my $parser = Chemistry::OpenSMILES::Parser->new; ( $graph ) = map { ChemOnomatopist::MolecularGraph->new( $_ ) } $parser->parse( $what ); # Taking only the first graph } die "nothing supplied for get_name()\n" unless $graph; # Check if graph is a tree as trees are easy to process if( $graph->edges != $graph->vertices - 1 ) { # If it is not a tree, than the graph has cycles, and we have to # do our best to recognise them. To make it easier, hydrogen atoms # are removed here for now. $graph->delete_vertices( grep { $_->{symbol} eq 'H' } $graph->vertices ); if( $graph->edges != $graph->vertices ) { die "cannot handle cycles with attachments for now\n"; } if( any { uc $_->{symbol} ne 'C' } $graph->vertices ) { die "cannot handle heterocycles for now\n"; } if( all { $_->{symbol} eq 'C' } $graph->vertices ) { # Cycloalkane detected return 'cyclo' . alkane_chain_name( scalar $graph->vertices ) . 'ane'; } if( ( all { $_->{symbol} eq 'c' } $graph->vertices ) && ( scalar $graph->vertices ) =~ /^(4|6|8|10|12|14|16)$/ ) { # Annulene detected return 'cyclo' . IUPAC_numerical_multiplier( scalar $graph->vertices, 1 ) . IUPAC_numerical_multiplier( scalar $graph->vertices / 2, 1 ) . 'ene'; } # No other types of graphs with cycles can be processed for now die "only limited set of homocycles is supported for now\n"; } # Hydrogen atoms are no longer important $graph->delete_vertices( grep { is_element( $_, 'H' ) } $graph->vertices ); # Check for unsupported elements. if( any { !is_element( $_, 'C' ) } $graph->vertices ) { die "cannot handle atoms other than C and H now\n"; } my( $order ) = select_mainchain( $graph->copy ); my @chain; for my $curr_vertex (@$order) { my( $vertex ) = grep { $_->{number} == $curr_vertex } $graph->vertices; push @chain, $vertex; } return get_mainchain_name( $graph->copy, ChemOnomatopist::Chain->new( $graph, undef, @chain ) ); } # BFS is performed for the given graph after all vertices that are not carbons # removed sub BFS_order_carbons_only { my( $graph, $start ) = @_; $graph = $graph->copy; $graph->delete_vertices( grep { !is_element( $_, 'C' ) } $graph->vertices ); my $bfs; if( $start ) { $bfs = Graph::Traversal::BFS->new( $graph, start => $start ); } else { $bfs = Graph::Traversal::BFS->new( $graph ); } return $bfs->bfs; } # Subroutine gets an graph, removes all vertices that do not have C as their element. # Performs BFS on that chain. During BFS, distance from start is calculated to each vertice sub BFS_order_carbons_only_return_lengths { my ( $graph, $start ) = @_; my $carbon_graph = $graph->copy; $carbon_graph->delete_vertices( grep { !is_element( $_, 'C') } $carbon_graph->vertices ); my $is_any_visited; my %lengths; my $bfs = Graph::Traversal::BFS->new( $carbon_graph, pre => sub { if( !$is_any_visited ) { $lengths{$_[0]->{number}} = 0; $is_any_visited = 1; } }, tree_edge => sub { if( !defined $lengths{$_[0]->{number}} ) { ( $_[0], $_[1] ) = ( $_[1], $_[0] ); } $lengths{$_[1]->{number}} = $lengths{$_[0]->{number}} + 1; }, start => $start ); return \%lengths, $bfs->bfs; } # Returns main (parental) chain to be used during the naming sub select_mainchain { my( $graph ) = @_; my @order = BFS_order_carbons_only( $graph ); my $start = $order[-1]; my( $lengths, @second_order ) = BFS_order_carbons_only_return_lengths( $graph, $start ); my $end = $second_order[-1]; # Finding all farthest vertices from the starting point my @farthest = grep { $lengths->{$_} eq $lengths->{$end->{number}} } keys %$lengths; # Also adding the first vertice to the array since it is farthest from other # ones push @farthest, $start->{number}; # Make a carbon-only graph my $carbon_graph = $graph->copy; $carbon_graph->delete_vertices( grep { !is_element( $_, 'C' ) } $carbon_graph->vertices ); # Going through every vertice in "farthest" array and creating tree-like structures my @all_trees; for (my $i = 0; $i < scalar @farthest; $i++) { my %tree = ( $farthest[$i] => [ $farthest[$i], 0 ] ); my( $vertex ) = grep { $_->{number} eq $farthest[$i] } $carbon_graph->vertices; # Start creation of the tree from all the starting vertices push @all_trees, \%{create_tree( $carbon_graph->copy, $vertex, \%tree )}; } my $trees; # Extracts arrays of all longest chains with numbers of vertices (in order) # from each tree-like structure my @main_chains = prepare_paths( @all_trees ); # From all possible main chains in tree-like structures, subroutine returns # the ones that has the greatest number of side chains. Also returns only the # trees that still have some possible main chains after selection ( $trees, @main_chains ) = rule_greatest_number_of_side_chains( $carbon_graph->copy, \@main_chains, @all_trees, ); return @{$main_chains[0]} if scalar @{$main_chains[0]} == 1; # If more than one chain is left, second rule is applied. # From all main chains left, subroutine selects all that have the # lowest numbered locants. Also the trees that have possible main # chains returned ( $trees, @main_chains ) = rule_lowest_numbered_locants( $carbon_graph->copy, @main_chains, @$trees, ); return @{$main_chains[0]} if scalar @{$main_chains[0]} == 1; # If more than one chain is left, third rule is applied. # From all main chains left, subroutine selects all that have the # most carbons in side chains. Also the trees that have possible main # chains returned ( $trees, @main_chains ) = rule_most_carbon_in_side_chains( $carbon_graph->copy, @main_chains, @$trees, ); return @{$main_chains[0]} if scalar @{$main_chains[0]} == 1; # If more than one chain is left, fourth rule is applied. # From all main chains left, subroutine selects all that have # the least branched side chains. Also the trees that have # possible main chains returned ( $trees, @main_chains ) = rule_least_branched_side_chains( $carbon_graph->copy, @main_chains, @$trees, ); return @{$main_chains[0]} if scalar @{$main_chains[0]} == 1; # If more than one chain is left, program states that there are # few chains that are identical by all the rules and selects # one from all that are left. # One main chain is picked from all that are left my $main_chain = rule_pick_chain_from_valid( $carbon_graph->copy, @main_chains, @$trees, ); return $main_chain; } # Creating tree like structure for all the longest paths in molecule sub create_tree { my( $graph, $atom, $tree ) = @_; my @neighbours = $graph->neighbours( $atom ); my @array = @{$tree->{$atom->{number}}}; my @new_array = ($atom->{number}); # Since first number in the stack-of-array boxes represents the parent of # the vertice, array with box information is shifted shift @array; # Box for the vertice is created by increasing box information in parental # box by one push @new_array, map { $_ + 1 } @array; # If there is one neighbour, it means that vertice do not have any branching. # Analysis of next vertice (the neighbour) is started if( scalar @neighbours == 1 ) { unless (exists $tree->{$neighbours[0]->{number}}) { $tree->{$neighbours[0]->{number}} = [ @new_array ]; $graph->delete_vertex( $atom ); create_tree( $graph, $neighbours[0], $tree ); } } # If there is more than one neighour for the current vertice, analysis of # each of them is started independently elsif( scalar @neighbours > 1 ) { push @new_array, 0; $graph->delete_vertex( $atom ); foreach my $neighbour ( @neighbours ) { $tree->{$neighbour->{number}} = [ @new_array ]; create_tree( $graph, $neighbour, $tree ); } } # If there is no neighbour or other vertices, all graph has been analyzed. # Created structure is returned return $tree; } # Create arrays of vertice numbers of all possible longest chains in the tree sub prepare_paths { my( @trees ) = @_; my $trees_copy = clone \@trees; my @all_chains; foreach my $tree ( @$trees_copy ) { my %structure = %$tree; # Tree-like structure is sorted by parental vertex number my @sorted = sort { $structure{$a}->[1] <=> $structure{$b}->[1] } keys %structure; my $last = $sorted[-1]; my @chain_ending = grep { $structure{$_}->[1] == $structure{$last}->[1] } keys %structure; foreach my $ending ( @chain_ending ) { push @all_chains, save_main_chain_vertices_in_array( $ending, [$ending], $tree ); } } # Adds reverted chains if they are not present yet as the longest chains for my $chain (@all_chains) { next if array_exists( [reverse @$chain], @all_chains ); push @all_chains, [reverse @$chain]; } return @all_chains; } # Checks if array exists in array of arrays sub array_exists { my( $chain, @all_chains ) = @_; my $same; for my $curr_chain ( @all_chains ) { $same = 1; for my $index ( 0..scalar @{$curr_chain}-1 ) { if( $chain->[$index] != $curr_chain->[$index] ) { $same = 0; last; } } return 1 if $same; } return 0; } # Tries to find the chain which has the greatest number of side chains sub rule_greatest_number_of_side_chains { my( $graph, $chains, @trees ) = @_; my $trees_copy = clone \@trees; my $index = 0; my @number_of_side_chains; foreach my $tree ( @$trees_copy ) { my %structure = %{clone $tree}; # Reference to parental chain is removed from the boxes foreach my $key ( keys %structure ) { shift @{$structure{$key}}; } # Beginning of the structure is found. Then all chains that belongs to # the current tree are selected my @first = grep { $structure{$_}->[0] == 0 } keys %structure; my @chains_in_the_tree = grep { $_->[0] == $first[0] || $_->[-1] == $first[0] } @$chains; # Structure with index of the tree, beginning and ending of the chain, # number of side chains in the chain created for each chain for my $chain ( @chains_in_the_tree ) { push @number_of_side_chains, [$index, $chain->[0], $chain->[-1], find_number_of_side_chains( $graph, \@{$chain}, $tree ) ]; } $index++; } # All chains that have the biggest number of side chains are selected and returned my @sorted_numbers = sort { $a->[3] <=> $b->[3] } @number_of_side_chains; my $path_length = $sorted_numbers[-1][3]; my @biggest_number_of_side_chains = grep {$_->[3] == $path_length} @number_of_side_chains; my %seen; my @uniq_biggest_number_of_side_chains = grep { !$seen{$_->[0]}++ } @biggest_number_of_side_chains; my @result = @trees[map {$_->[0]} @uniq_biggest_number_of_side_chains]; my @eligible_chains; for my $chain (@$chains) { if( any {$_->[1] == $chain->[0] && $_->[2] == $chain->[-1]} @biggest_number_of_side_chains ) { push @eligible_chains, $chain; } } return \@result, \@eligible_chains; } # Tries to find the chain which has the lowest-numbered locants sub rule_lowest_numbered_locants { my ( $graph, $chains, @trees ) = @_; my $trees_copy = clone \@trees; my $index = 0; my @locant_placing; foreach my $tree ( @$trees_copy ) { my %structure = %{clone $tree}; # Reference to parental chain is removed from the boxes foreach my $key ( keys %structure ) { shift @{$structure{$key}}; } # Beginning of the structure is found. Then all chains that belongs to # the current tree are selected my @first = grep { $structure{$_}->[0] == 0 } keys %structure; my @chains_in_the_tree = grep { @{$_}[0] == $first[0] || @{$_}[-1] == $first[0] } @$chains; # Structure with index of the tree, beginning and ending of the chain, # places of the locants in the chain created for each tree for my $chain ( @chains_in_the_tree ) { push @locant_placing, [$index, $chain->[0], $chain->[-1], [find_locant_placing( $graph, $chain, )] ]; } $index++; } # All chains that have the lowest numbers of locants are selected and returned my @sorted_paths = sort compare_locant_placings reverse @locant_placing; my $lowest_locants = $sorted_paths[0][3]; my @lowest_locants_paths = grep { join( '', @{$_->[3]} ) eq join( '', @$lowest_locants ) } @locant_placing; my %seen; my @uniq_lowest_locants_paths = grep { !$seen{$_->[0]}++ } @lowest_locants_paths; my @result = @trees[map {$_->[0]} @uniq_lowest_locants_paths]; my @eligible_chains; for my $chain (@$chains) { if( any { $_->[1] == $chain->[0] and $_->[2] == $chain->[-1] } @lowest_locants_paths) { push @eligible_chains, $chain; } } return \@result, \@eligible_chains; } # Tries to find chain that has the greatest number of carbon atoms in the smaller # side chains sub rule_most_carbon_in_side_chains { my ( $graph, $chains, @trees ) = @_; my $trees_copy = clone \@trees; my @side_chain_lengths; my $index = 0; foreach my $tree (@$trees_copy) { my %structure = %{clone $tree}; my @all_vertices = keys %structure; # Reference to parental chain is removed from the boxes foreach my $key (keys %structure) { shift @{$structure{$key}}; } my @sorted = sort { @{$structure{$a}} <=> @{$structure{$b}} or $structure{$a}->[0] cmp $structure{$b}->[0] } keys %structure; my $last = $sorted[-1]; # Beginning of the structure is found. Then all chains that belongs to # the current tree are selected my( $first ) = grep { $structure{$_}->[0] == 0 } keys %structure; my @structure_chains = grep { $_->[0] == $first || $_->[-1] == $first } @$chains; # Structure with index of the tree, beginning and ending of the chain, # lengths of side chains of the chain created for each tree for my $chain (@structure_chains) { push @side_chain_lengths, [$index, $chain->[0], $chain->[-1], [find_lengths_of_side_chains( $graph->copy, $chain->[-1], \@{$chain}, [], $tree, scalar @{$chain} )] ]; } $index++; } # All chains that have the highest number of carbons in side chains are selected and returned my @sorted_final = sort compare_locant_placings @side_chain_lengths; my $last = $sorted_final[-1][3]; my @greatest_no_of_side_chains_paths = grep { join( '', @{$_->[3]} ) eq join( '', @$last ) } @sorted_final; my @eligible_chains; for my $chain (@{$chains}){ if( any { $_->[1] == $chain->[0] && $_->[2] == $chain->[-1] } @greatest_no_of_side_chains_paths ) { push @eligible_chains, $chain; } } my %seen; my @uniq_side_chain_paths = grep { !$seen{$_->[0]}++ } @greatest_no_of_side_chains_paths; my @result = @trees[map { $_->[0] } @uniq_side_chain_paths]; return \@result, \@eligible_chains; } # Tries to find chain that have the least branched side chains sub rule_least_branched_side_chains { my ( $graph, $chains, @trees ) = @_; my $trees_copy = clone \@trees; my $index = 0; my @number_of_branched_side_chains; foreach my $tree (@$trees_copy) { my %structure = %{clone $tree}; # Reference to parental chain is removed from the boxes foreach my $key (keys %structure) { shift @{$structure{$key}}; } # Beginning of the structure is found. Then all chains that belong to # the current tree are selected my( $first ) = grep { $structure{$_}->[0] == 0 } keys %structure; my @chains_in_the_tree = grep { $_->[0] == $first || $_->[-1] == $first } @$chains; for my $chain (@chains_in_the_tree) { push @number_of_branched_side_chains, [$index, $chain->[0], $chain->[-1], find_number_of_branched_side_chains( $graph, \@{$chain}, $tree ) ]; } $index++; } # All chains that have the least amount of branches side chains are selected and returned my @sorted_paths = sort { $a->[3] <=> $b->[3] } @number_of_branched_side_chains; my $path_length = $sorted_paths[0][3]; my @longest_paths = grep { $_->[3] == $path_length } @number_of_branched_side_chains; my %seen; my @uniq_longest_paths = grep { !$seen{$_->[0]}++ } @longest_paths; my @result = @trees[map { $_->[0] } @uniq_longest_paths]; my @eligible_chains; for my $chain (@$chains) { if( any { $_->[1] == $chain->[0] && $_->[2] == $chain->[-1] } @longest_paths ){ push @eligible_chains, $chain; } } return \@result, \@eligible_chains; } # Subroutine sorts all valid chains that are left and returns the first one - # the one that have carbons with lowest indexes if there is no differences # regarding the attachment names. If there is, then selects the ones that have # lowest attachment indexes with lowest attachments alphabetically sub rule_pick_chain_from_valid { my( $graph, $chains, @trees ) = @_; my( $chain ) = sort { cmp_arrays( $a, $b ) } pick_chain_with_lowest_attachments_alphabetically( $graph, $chains, @trees ); return $chain; } # Subroutine selects chain that has the lowest attachments by alpabetical naming sub pick_chain_with_lowest_attachments_alphabetically { my( $graph, $chains, @trees ) = @_; # Locant placements are found for all trees my @attachments; for my $i (0..$#trees) { my %structure = %{clone $trees[$i]}; # Reference to parental chain is removed from the boxes for my $key (keys %structure) { shift @{$structure{$key}}; } # Beginning of the structure is found. Then all chains that belongs to # the current tree are selected my( $first ) = grep { $structure{$_}->[0] == 0 } keys %structure; my @chains_in_the_tree = grep { $_->[0] == $first || $_->[-1] == $first } @$chains; # Placings of the locants found for each chain for my $chain (@chains_in_the_tree) { my @attachments_only; for my $locant (uniq find_locant_placing( $graph, $chain )) { my( $vertex ) = grep { $_->{number} == $chain->[$locant-1] } $graph->vertices; # Cycle through non-mainchain neighbours: for my $neighbour ($graph->neighbours( $vertex )) { next if any { $neighbour->{number} eq $_ } @$chain; # Find the name for a sidechain my $graph_copy = $graph->copy; $graph_copy->delete_edge( $vertex, $neighbour ); my $attachment_name = get_sidechain_name( $graph_copy, $vertex, $neighbour ); $attachment_name = "($attachment_name)" if $attachment_name =~ /^[0-9]/; push @attachments_only, $attachment_name; } } push @attachments, [clone( $chain ), \@attachments_only]; } } # All chains that have the same - alpabetically lowest attachments selected my( $best ) = sort { cmp_attachments( $a->[1], $b->[1] ) } @attachments; my @correct_chains_all = grep { join( ',', @{$_->[1]} ) eq join( ',', @{$best->[1]} ) } @attachments; return map { $_->[0] } @correct_chains_all; } # Returns array that contains numbers of vertices that are in main chain sub save_main_chain_vertices_in_array { my( $curr_vertex, $all_vertices, $structure ) = @_; if( $structure->{$curr_vertex}[0] == $curr_vertex ) { return $all_vertices; } else { push @$all_vertices, $structure->{$curr_vertex}[0]; return save_main_chain_vertices_in_array( $structure->{$curr_vertex}->[0], $all_vertices, $structure ); } } # Returns array that contains lengths of all side chains sub find_lengths_of_side_chains { my( $graph, $curr_vertex, $main_chain_vertices, $side_chain_lengths, $structure, $atoms_left ) = @_; return sort @$side_chain_lengths unless $atoms_left; my @vertices = $graph->vertices; my( $vertex ) = grep { $_->{number} == $curr_vertex } @vertices; my @curr_neighbours = $graph->neighbours( $vertex ); if( scalar @curr_neighbours == 1 ) { $graph->delete_vertex( $vertex ); $atoms_left--; find_lengths_of_side_chains( $graph, $curr_neighbours[0]->{number}, $main_chain_vertices, $side_chain_lengths, $structure, $atoms_left ); } else { my @side_chain_neighbours; my $next_chain_vertex; # Find all neighours of the chain that does not exist in main chain and # the next chain to be analyzed foreach my $neigh ( @curr_neighbours ) { if( any { $neigh->{number} eq $_ } @$main_chain_vertices ) { $next_chain_vertex = $neigh; } else { push @side_chain_neighbours, $neigh; } } $graph->delete_vertex( $vertex ); $atoms_left--; # For each side chain neighbour, find their chain lengths foreach my $neighbour ( @side_chain_neighbours ) { push @$side_chain_lengths, BFS_calculate_chain_length( $graph, $neighbour ); } find_lengths_of_side_chains( $graph, $next_chain_vertex->{number}, $main_chain_vertices, $side_chain_lengths, $structure, $atoms_left ); } } # TODO: Try to merge all subroutines # Find placings of all locants in the chain. # Returns an array of indices denoting locant placements. # This code treats vertices with degrees larger than 2 as having sidechain attachments. sub find_locant_placing { my( $graph, $main_chain ) = @_; # Indices received instead of vertices, transform them. # This later on should be removed. if( @$main_chain && !ref $main_chain->[0] ) { my %vertices_by_id = map { ( $_->{number} => $_ ) } $graph->vertices; $main_chain = [ map { $vertices_by_id{$_} } @$main_chain ]; } # Visit all attachments and memorize their attachment positions my @locants; for my $i (0..$#$main_chain) { my $vertex = $main_chain->[$i]; next unless $graph->degree( $vertex ) > 2; push @locants, ( $i ) x ( $graph->degree( $vertex ) - 2 ); } return @locants; } # Returns number of side chains sub find_number_of_side_chains { my( $graph, $main_chain, $structure ) = @_; # Code is destructive, need to make a copy before execution: $graph = $graph->copy; my @vertices = $graph->vertices; my $number_of_side_chains = 0; for my $curr_vertex ( reverse @$main_chain ) { my( $vertex ) = grep { $_->{number} == $curr_vertex } @vertices; my @curr_neighbours = $graph->neighbours( $vertex ); return $number_of_side_chains unless scalar @curr_neighbours; $graph->delete_vertex( $vertex ); if( scalar @curr_neighbours > 1 ) { foreach my $neigh (@curr_neighbours) { next if any { $neigh->{number} eq $_ } @$main_chain; $number_of_side_chains++; } } } } sub find_number_of_branched_side_chains { my( $graph, $main_chain, $structure ) = @_; # Code is destructive, need to make a copy before execution: $graph = $graph->copy; my @vertices = $graph->vertices; my $number_of_branched_side_chains = 0; for my $curr_vertex ( reverse @$main_chain ) { my( $vertex ) = grep {$_->{number} == $curr_vertex} @vertices; my @curr_neighbours = $graph->neighbours( $vertex ); return $number_of_branched_side_chains unless scalar @curr_neighbours; $graph->delete_vertex( $vertex ); if( scalar @curr_neighbours > 1 ) { foreach my $neigh (@curr_neighbours) { next if any { $neigh->{number} eq $_ } @$main_chain; $number_of_branched_side_chains += BFS_is_chain_branched( $graph, $neigh ); } } } } # Sorts locant placings from lowest to largest # This had code identical to compare_side_chain_lengths(), thus calls to the latter have been redirected here. sub compare_locant_placings { my @A = @$a; my @B = @$b; if( @A >= 4 && ref $A[3] ) { # This is the "old" data structure @A = @{$A[3]}; @B = @{$B[3]}; } for (0..$#A) { return $A[$_] <=> $B[$_] if $A[$_] <=> $B[$_]; } return 0; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Util.pm000066400000000000000000000015231452012116100217530ustar00rootroot00000000000000package ChemOnomatopist::Util; use strict; use warnings; # ABSTRACT: Generic utilities # VERSION use Exporter; use Graph::Undirected; use Scalar::Util qw( blessed ); use parent Exporter::; our @EXPORT_OK = qw( copy zip ); sub copy($) { my( $object ) = @_; die "can only copy Graph now\n" unless blessed $object && $object->isa( Graph::Undirected:: ); # Graphs have to be copied with the following code as Graph::copy() loses attributes. my $graph = $object; my $copy = $graph->copy; for my $edge ($graph->edges) { next unless $graph->has_edge_attributes( @$edge ); $copy->set_edge_attributes( @$edge, $graph->get_edge_attributes( @$edge ) ); } return $copy; } sub zip(@) { die "odd input to zip\n" if @_ % 2; my $N = @_ / 2; return map { $_[$_], $_[$_ + $N] } 0..$N-1; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Util/000077500000000000000000000000001452012116100214145ustar00rootroot00000000000000ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Util/Graph.pm000066400000000000000000000253351452012116100230230ustar00rootroot00000000000000package ChemOnomatopist::Util::Graph; use strict; use warnings; # ABSTRACT: Generic graph utilities # VERSION use ChemOnomatopist::Util qw( copy ); use Exporter; use Graph::Traversal::BFS; use List::Util qw( any sum0 ); use Set::Object qw( set ); use parent Exporter::; our @EXPORT_OK = qw( BFS_calculate_chain_length BFS_is_chain_branched cyclic_components graph_center graph_cycle_core graph_cycles graph_has_cycle graph_longest_paths graph_longest_paths_from_vertex graph_path_between_vertices graph_replace graph_without_edge_attributes merge_graphs subgraph tree_branch_positions tree_number_of_branches ); # Calculates length of given graph (vertices count) sub BFS_calculate_chain_length { my( $graph, $start ) = @_; my $bfs = Graph::Traversal::BFS->new( $graph, start => $start ); return scalar $bfs->bfs; } # Returns 1 if there is any braches in the given graph and if there is none sub BFS_is_chain_branched { my( $graph, $start ) = @_; # FIXME: Not entirely sure why visited vertices are removed (A.M.) my $graph_copy = $graph->copy; my $branched = 0; my $bfs = Graph::Traversal::BFS->new( $graph, pre => sub { my @neighbours = $graph_copy->neighbours( $_[0] ); $branched = 1 if scalar @neighbours > 1; $graph_copy->delete_vertex( $_[0] ); }, start => $start ); $bfs->bfs; return $branched; } sub cyclic_components { my( $graph ) = @_; $graph = copy $graph; # Due to the issue in Graph, bridges() returns strings instead of real objects. # Graph issue: https://github.com/graphviz-perl/Graph/issues/29 my %vertices_by_name = map { $_ => $_ } $graph->vertices; $graph->delete_edges( map { map { $vertices_by_name{$_} } @$_ } $graph->bridges ); $graph->delete_vertices( grep { !$graph->degree( $_ ) } $graph->vertices ); return () unless $graph->vertices; # No vertices = no cycles return map { subgraph( $graph, @$_ ) } $graph->connected_components; } # Find how many side attachments are at every position of the given path. sub graph_attachment_positions { my( $graph, @vertices ) = @_; $graph = $graph->copy; $graph->delete_path( @vertices ); return map { $graph->degree( $_ ) } @vertices; } # Finds center (or two centers) of a tree graph. # Returns one or two vertices constituting the center. sub graph_center { my( $graph ) = @_; $graph = $graph->copy; my $nvertices = scalar $graph->vertices; while( $graph->vertices > 2 ) { $graph->delete_vertices( grep { $graph->degree( $_ ) == 1 } $graph->vertices ); my $nvertices_now = scalar $graph->vertices; if( $nvertices_now == $nvertices ) { # Safeguard for cycles and/or isolated vertices die 'cannot find center of cyclic or isolated graphs'; } $nvertices = $nvertices_now; } return $graph->vertices; } # Iteratively removes leaves of a graph until cycle core remains. sub graph_cycle_core { my( $graph ) = @_; $graph = $graph->copy; while( my @leaves = grep { $graph->degree( $_ ) == 1 } $graph->vertices ) { $graph->delete_vertices( @leaves ); } return $graph; } # Decomposes bridgeless graph into cycles # FIXME: Experimental sub graph_cycles { my( $graph ) = @_; $graph = copy $graph; my @cycles; while( $graph->vertices ) { my $triple_connected = set( grep { $graph->degree( $_ ) == 3 } $graph->vertices ); if( !@$triple_connected ) { push @cycles, $graph; last; } my @chords = grep { $triple_connected->has( $_->[0] ) && $triple_connected->has( $_->[1] ) } $graph->edges; my $wo_chords = copy( $graph )->delete_vertices( @$triple_connected ); for my $component ($wo_chords->connected_components) { next if @$component == 1; my @ends = grep { $wo_chords->degree( $_ ) == 1 } @$component; # Should be two my( $chord ) = grep { ($graph->has_edge( $_->[0], $ends[0] ) && $graph->has_edge( $_->[1], $ends[1] )) || ($graph->has_edge( $_->[0], $ends[1] ) && $graph->has_edge( $_->[1], $ends[0] )) } @chords; next unless $chord; # Found a chord which completes a cycle my $vertices = set( @$component, @$chord ); my $cycle = copy( $graph )->delete_vertices( grep { !$vertices->has( $_ ) } $graph->vertices ); push @cycles, $cycle; $graph->delete_vertices( @$component ); } } return @cycles; } sub graph_has_cycle { my( $graph ) = @_; return $graph->edges > $graph->vertices - 1; } # Finds longest paths in a tree graph. # The subroutine finds all longest paths originating at graph center(s) and produces all their combinations. # No two paths containing the same vertices are returned. sub graph_longest_paths { my( $graph ) = @_; my @centers = graph_center( $graph ); my @longest_paths; if( @centers == 1 ) { # Single-centered graph # Removing the center from longest path parts, to be added later my @longest_path_parts = map { [ @{$_}[1..$#$_] ] } graph_longest_paths_from_vertex( $graph, $centers[0] ); for my $i (0..$#longest_path_parts) { for my $j (0..$#longest_path_parts) { next if $i >= $j; # Ensure that two paths do not start at the same vertex. next if $longest_path_parts[$i]->[0] eq $longest_path_parts[$j]->[0]; push @longest_paths, [ reverse( @{$longest_path_parts[$i]} ), $centers[0], @{$longest_path_parts[$j]} ]; } } } else { # Double-centered graph $graph = $graph->copy; $graph->delete_edge( @centers ); my @longest_path_parts1 = graph_longest_paths_from_vertex( $graph, $centers[0] ); my @longest_path_parts2 = graph_longest_paths_from_vertex( $graph, $centers[1] ); for my $i (0..$#longest_path_parts1) { for my $j (0..$#longest_path_parts2) { push @longest_paths, [ reverse( @{$longest_path_parts1[$i]} ), @{$longest_path_parts2[$j]} ]; } } } return @longest_paths; } # Finds all the longest paths from given vertex sub graph_longest_paths_from_vertex { my( $graph, $vertex ) = @_; my %from = ( $vertex => undef ); my %length = ( $vertex => 0 ); my $bfs = Graph::Traversal::BFS->new( $graph, tree_edge => sub { my( $u, $v ) = @_; ( $u, $v ) = ( $v, $u ) if exists $from{$v}; $from{$v} = $u; $length{$v} = $length{$u} + 1; }, start => $vertex, ); $bfs->bfs; my @furthest_leaves; my $furthest_distance = 0; for my $vertex ( $graph->vertices ) { next unless exists $length{$vertex}; # May happen in disconnected graphs if( $length{$vertex} < $furthest_distance ) { next; } elsif( $length{$vertex} == $furthest_distance ) { push @furthest_leaves, $vertex; } else { @furthest_leaves = ( $vertex ); $furthest_distance = $length{$vertex}; } } # Backtrack starting from the furthest leaves to collect the longest # paths. In the returned result path, starting vertex is the first. my @longest_paths; for my $vertex ( @furthest_leaves ) { my @path; while( defined $vertex ) { push @path, $vertex; $vertex = $from{$vertex}; } push @longest_paths, [ reverse @path ]; } return @longest_paths; } # Replace one or more old vertices with a single new one sub graph_replace { my( $graph, $new, @old ) = @_; $graph->add_vertex( $new ); my $old = set( @old ); for my $edge (grep { ($old->has( $_->[0] ) && !$old->has( $_->[1] )) || ($old->has( $_->[1] ) && !$old->has( $_->[0] )) } $graph->edges) { my( $vertex, $neighbour ) = $old->has( $edge->[0] ) ? @$edge : reverse @$edge; next if $graph->has_edge( $new, $neighbour ); $graph->add_edge( $new, $neighbour ); next unless $graph->has_edge_attributes( @$edge ); $graph->set_edge_attributes( $new, $neighbour, $graph->get_edge_attributes( @$edge ) ); } $graph->delete_vertices( @old ); return $graph; } sub graph_without_edge_attributes($) { my( $graph ) = @_; $graph = $graph->copy; for ($graph->edges) { $graph->delete_edge_attributes( @$_ ); } return $graph; } sub merge_graphs { my( $A, $B, $A_vertex, $B_vertex ) = @_; my $merged = copy $A; for my $edge ($B->edges) { $merged->add_edge( @$edge ); next unless $B->has_edge_attributes( @$edge ); $merged->set_edge_attributes( @$edge, $B->get_edge_attributes( @$edge ) ); } $merged->add_edge( $A_vertex, $B_vertex ) if $A_vertex && $B_vertex; return $merged; } sub subgraph { my( $graph, @vertices ) = @_; my $vertices = set( @vertices ); $graph = copy $graph; $graph->delete_vertices( grep { !$vertices->has( $_ ) } $graph->vertices ); return $graph; } # Given a tree and a path, finds the number of branches branching off the given path. # It is equal to the sum of all degrees minus the edges between vertices in a path. sub tree_number_of_branches { my( $tree, @vertices ) = @_; return sum0( map { $tree->degree( $_ ) } @vertices ) - 2 * (scalar @vertices - 1); } # Returns a list of 0-based indices of branch positions. sub tree_branch_positions { my( $tree, @vertices ) = @_; return map { ( $_ ) x ( $tree->degree( $vertices[$_] ) - 2 ) } grep { $tree->degree( $vertices[$_] ) > 2 } 0..$#vertices; } # Find a path between two vertices in an acyclic graph. sub graph_path_between_vertices { my( $graph, $A, $B ) = @_; if( graph_has_cycle( $graph ) ) { die "cannot call graph_path_between_vertices() on graph with cycles\n"; } $graph = $graph->copy; while( my @leaves = grep { $graph->degree( $_ ) == 1 && $_ != $A && $_ != $B } $graph->vertices ) { $graph->delete_vertices( @leaves ); } # No edges means that path does not exist between these two vertices return () unless $graph->edges; my @path; my $vertex = $A; while( $vertex ) { push @path, $vertex; my( $vertex_now ) = $graph->neighbours( $vertex ); $graph->delete_vertex( $vertex ); $vertex = $vertex_now; } return @path; } 1; ChemOnomatopist-0.6.1/lib/ChemOnomatopist/Util/SMILES.pm000066400000000000000000000026321452012116100227510ustar00rootroot00000000000000package ChemOnomatopist::Util::SMILES; use strict; use warnings; # ABSTRACT: SMILES utilities # VERSION use parent Exporter::; our @EXPORT_OK = qw( cycle_SMILES cycle_SMILES_explicit path_SMILES ); sub cycle_SMILES { my( $graph, @cycle ) = @_; my $SMILES = cycle_SMILES_explicit( $graph, @cycle ); $SMILES =~ s/-//g; return $SMILES; } sub cycle_SMILES_explicit { my( $graph, @cycle ) = @_; my $SMILES = ''; for my $i (0..$#cycle) { my $symbol = $cycle[$i]->{symbol}; $symbol = "[$symbol]" unless $symbol =~ /^[bcnosp]$/i || $symbol =~ /^(F|Cl|Br|I|\*)$/; $SMILES .= $symbol; if( $graph->has_edge_attribute( $cycle[$i], $cycle[($i+1) % scalar @cycle], 'bond' ) ) { $SMILES .= $graph->get_edge_attribute( $cycle[$i], $cycle[($i+1) % scalar @cycle], 'bond' ); } else { $SMILES .= '-'; } } return $SMILES; } sub path_SMILES { my( $graph, @path ) = @_; my $SMILES = ''; for my $i (0..$#path) { my $symbol = $path[$i]->{symbol}; $symbol = "[$symbol]" unless $symbol =~ /^[bcnosp]$/i || $symbol =~ /^(F|Cl|Br|I|\*)$/; $SMILES .= $symbol; next if $i == $#path; next unless $graph->has_edge_attribute( $path[$i], $path[$i+1], 'bond' ); $SMILES .= $graph->get_edge_attribute( $path[$i], $path[$i+1], 'bond' ); } return $SMILES; } 1; ChemOnomatopist-0.6.1/links.md000066400000000000000000000007541452012116100162700ustar00rootroot00000000000000To find the longest chain, longest path has to be established in the tree for tree compounds: * https://www.geeksforgeeks.org/longest-path-undirected-tree/ Story for the name of the project: * https://english.stackexchange.com/questions/430516/is-there-a-word-for-the-giver-of-names BSc thesis of MiglÄ— UrbonaitÄ— on ChemOnomatopist (in Lithuanian): * https://www.lvb.lt/permalink/f/8og6ah/ELABAETD146240727 Blue Book v2 (BBv2): * https://iupac.qmul.ac.uk/BlueBook/PDF/BlueBookV2.pdf ChemOnomatopist-0.6.1/scripts/000077500000000000000000000000001452012116100163075ustar00rootroot00000000000000ChemOnomatopist-0.6.1/scripts/make-test000077500000000000000000000017651452012116100201400ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use File::Basename qw( basename ); use IPC::Run3; use Getopt::Long::Descriptive; my $basename = basename $0; my( $opt, $usage ) = describe_options( <<"END" . 'OPTIONS', USAGE $basename [] [] DESCRIPTION $basename prepares tests for ChemOnomatopist END [ 'pubchem', 'treat the input as three-columned PubChem file' ], [], [ 'help', 'print usage message and exit', { shortcircuit => 1 } ], ); if( $opt->help ) { print $usage->text; exit; } while( <> ) { s/^\s+//; s/\s+$//; my( $smiles, $iupac, $source ); if( $opt->pubchem ) { ( my $id, $iupac, $smiles ) = split /\t/, $_; $source = "PubChem $id"; } else { my( $stderr ); $iupac = $_; run3 [ 'java', '-jar', '/usr/share/java/opsin.jar' ], \$iupac, \$smiles, \$stderr; $smiles =~ s/\n$//; } print "{ smiles => '$smiles', iupac => '$iupac' },"; print " # $source" if $source; print "\n"; } ChemOnomatopist-0.6.1/t/000077500000000000000000000000001452012116100150635ustar00rootroot00000000000000ChemOnomatopist-0.6.1/t/01_alkane.t000066400000000000000000000104651452012116100170110ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Old; use Test::More; my @cases = ( { smiles => 'CCCC', iupac => 'butane' }, { smiles => 'CCCCC', iupac => 'pentane' }, { smiles => 'CC(C)C', iupac => '2-methylpropane' }, { smiles => 'C1CCC1', iupac => 'cyclobutane' }, { smiles => 'c1ccccccccccccc1', iupac => 'cyclotetradecaheptaene' }, { smiles => 'CCCCCCCCCCC(C)CCCC', iupac => '5-methylpentadecane' }, { smiles => 'CC(C)CC(CCC(C)C)C', iupac => '2,4,7-trimethyloctane' }, { smiles => 'CCC(C)(C)CCCCCC(C)C', iupac => '2,8,8-trimethyldecane' }, { smiles => 'C(C)C(CCC(CCC(C)C)(C)C)C', iupac => '2,5,5,8-tetramethyldecane' }, { smiles => 'C(C)C(CC(C)C)CC', iupac => '4-ethyl-2-methylhexane' }, { smiles => 'C(C)C(C(CC)(C)C)CCC', iupac => '4-ethyl-3,3-dimethylheptane' }, { smiles => 'CCCC(CCC)(C(C)C)C(C)C', iupac => '4,4-di(propan-2-yl)heptane' }, { smiles => 'CCCC(CCC)C(C)C', iupac => '4-propan-2-ylheptane', AUTHOR => 1 }, # PubChem 142981 { smiles => 'CCCCC(CCCC)C(C)C(C)C', iupac => '5-(3-methylbutan-2-yl)nonane' }, { smiles => 'CCCCCC(CC(C)CC)CC(CCCCC)C(C)CCC', iupac => '6-(2-methylbutyl)-8-pentan-2-yltridecane', AUTHOR => 1 }, # PubChem 54207584 { smiles => 'CCCCCCCCCCCCCCCCCCCCCCCCCCCCCC(CCCCCCCCCC)CCCCCCCCCCCCC', iupac => '14-decyltritetracontane' }, { smiles => 'CCCCCCCC(CCCCCCC)(CCCCCCC)C(C)CC', iupac => '8-butan-2-yl-8-heptylpentadecane', AUTHOR => 1 }, # PubChem 154365261 { smiles => 'CCC(C)C(CC(C(C)CC)C(C)CC)C(C)CC', iupac => '4,6-di(butan-2-yl)-3,7-dimethylnonane' }, { smiles => 'CCCCCCCCCCCCC(CCC)(CCC)C(C(C)(C)C)C(CCCC)(CCCCCC)C(CC)(CCC)C(C)CCCC', iupac => '7-butyl-8-tert-butyl-6-ethyl-7-hexyl-5-methyl-6,9,9-tripropylhenicosane', AUTHOR => 1 }, { smiles => 'CC(CC(CC(CCCC)CC)CCCCCC)CC(C)C', iupac => '7-(2,4-dimethylpentyl)-5-ethyltridecane' }, # BBv2 P-14.5.2 { smiles => 'C(C)C(C(CCC)C)(C(CCCC)C)C', iupac => '5-ethyl-4,5,6-trimethyldecane' }, { smiles => 'C(C)C(C(CC)C)C(C(CCC)(C)C)(CC)CC', iupac => '4,5,5-triethyl-3,6,6-trimethylnonane' }, { smiles => 'C(C)C(C(C(CCC)C)(CCC)CCC)(CCCC)CCC', iupac => '6-ethyl-4-methyl-5,5,6-tripropyldecane' }, { smiles => 'C(C)C(C(CCC)(C)C)(C(C(CCC)(C)CC)CCC)CCC', iupac => '5,7-diethyl-4,4,7-trimethyl-5,6-dipropyldecane' }, { smiles => 'CCC(CC)C(C)C', iupac => '3-ethyl-2-methylpentane' }, { smiles => 'CCCCCCCCCCCCCCCCCCCCCCC', iupac => 'tricosane' }, { smiles => 'CC(CC(CCC)CCC)C', iupac => '2-methyl-4-propylheptane' }, { smiles => 'CC(CC(CC(CC(CC)CC)C)(CC(CC(CC)C)C)CC(CC(CC)C)C)CC(CC)C', iupac => '7,7-bis(2,4-dimethylhexyl)-3-ethyl-5,9,11-trimethyltridecane', AUTHOR => 1 }, # Fails 'old' nondeterministically on some machines { smiles => 'CC(C(CCC)C)C(CC(CCCC)CC)CCCCCC', iupac => '5-ethyl-7-(3-methylhexan-2-yl)tridecane' }, { smiles => 'CCC(CC)CCC(CCC(CC)CC)CCC(CCC(CCC(CC)CC)CCC(CC)CC)CCC(CCC(CC)CC)CCC(CC)CC', iupac => '3,15-diethyl-9-[6-ethyl-3-(3-ethylpentyl)octyl]-6,12-bis(3-ethylpentyl)heptadecane', AUTHOR => 1 }, # different order { smiles => 'CC(C)CC(CC(C)C)CC(CC(C)C)CC(C)C', iupac => '2,8-dimethyl-4,6-bis(2-methylpropyl)nonane' }, { smiles => 'CCCCCCCCCC(CCCC)(CCCC)C(C)(C)C', iupac => '5-butyl-5-tert-butyltetradecane' }, { smiles => 'CCCCC(CC)C(CCCC)C(C)(C)C', iupac => '5-ethyl-6-tert-butyldecane' }, { smiles => 'CCCCC(CCCC)CCCCCCCC(C(C)CCC)C(C)(C)C', iupac => '5-tert-butyl-13-butyl-4-methylheptadecane', AUTHOR => 1 }, { smiles => 'CC(CC)C(CCCC)(CCCCCCCCCCCCCCCCCCCCCCCCCC)CCCC', iupac => '5-(butan-2-yl)-5-butylhentriacontane' }, # BBv2 P-14.5.1 { smiles => 'CCCCCC(C)(C)C(C)CC(CC)CC(CCC(CC)CC)C(CCC(CC)CCC)(CC(CC)CCCC)C(CC)(CCCC(CC)CC)C(CC(C)C(CC)CC)(CC(C)(CC)CCC)C(C)C(CC(C)CC(C)CC)(CC(C)C(CC)CCC)CC(C)(CC)CCCC', iupac => '10,14-diethyl-11-(2-ethylhexyl)-11-(3-ethylhexyl)-10-(4-ethylhexyl)-7-(2-ethyl-2-methylhexyl)-7-(3-ethyl-2-methylhexyl)-9-(2-ethyl-2-methylpentyl)-9-(3-ethyl-2-methylpentyl)-12-(3-ethylpentyl)-3,5,8,16,17,17-hexamethyldocosane' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => 2 * @cases; for my $case (@cases) { my $name = $case->{iupac}; is ChemOnomatopist::get_name( $case->{smiles} ), $name, 'new'; is ChemOnomatopist::Old::get_name( $case->{smiles} ), $name, 'old'; } ChemOnomatopist-0.6.1/t/02_cod.t000066400000000000000000000036141452012116100163220ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use DBI; use IPC::Run3; use Test::More; if( !$ENV{EXTENDED_TESTING} ) { plan skip_all => "Skip \$ENV{EXTENDED_TESTING} is not set\n"; } my $dbh = db_connect('mysql', 'www.crystallography.net', 'cod', 3306, 'cod_reader', ''); # FIXME: Skip tests if connection is unsuccessful. (A.M.) # FIXME: Skip tests if OPSIN is not installed. (A.M.) my $sth = $dbh->prepare( 'SELECT chemname, value AS smiles FROM data JOIN smiles ON file = cod_id WHERE chemname IS NOT NULL' ); $sth->execute; my %tests; while (my $item = $sth->fetchrow_hashref) { if ($item->{'smiles'} =~ /^[CchH\[\]\(\)\-\+]+$/) { $tests{$item->{'chemname'}} = $item->{'smiles'}; } } my %opsin_approved; for my $compound (keys %tests) { my( $smiles, $stderr ); run3 'java -jar /usr/share/java/opsin.jar', \"$compound\n", \$smiles, \$stderr; chomp $smiles; next unless $smiles; # Skipping names that were not understood by OPSIN if( $smiles eq $tests{$compound} ) { # Ensuring OPSIN approval $opsin_approved{$smiles} = $compound; } } plan tests => scalar keys %opsin_approved; for my $case (keys %opsin_approved) { # FIXME: Chemical name may have initial letter uppercased, but it may # not be the right choice to lowercase it before comparison. # Need to think a bit more on how to deal with it. (A.M.) is( ChemOnomatopist::get_name( $case ), $opsin_approved{$case} ); } sub db_connect { my ($db_platform, $db_host, $db_name, $db_port, $db_user, $db_pass) = @_; my $dsn = "dbi:$db_platform:hostname=$db_host;dbname=$db_name;" . "user=$db_user;password=$db_pass"; my $options = { PrintError => 0, mysql_enable_utf8 => 1 }; my $dbh = DBI->connect( $dsn, $db_user, $db_pass, $options ); die 'could not connect to the database - ' . lcfirst( $DBI::errstr ) unless $dbh; return $dbh } ChemOnomatopist-0.6.1/t/04_longest_paths.t000066400000000000000000000107761452012116100204400ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Chain::FromHalves; use ChemOnomatopist::ChainHalf; use ChemOnomatopist::Util::Graph qw( graph_longest_paths graph_longest_paths_from_vertex ); use Graph::Undirected; use Test::More; sub chains_odd($@) { my( $graph, @halves ) = @_; @halves = map { ChemOnomatopist::ChainHalf->new( $graph, undef, @$_ ) } @halves; my @chains; for my $half1 (@halves) { for my $half2 (@halves) { next if $half1 == $half2; push @chains, ChemOnomatopist::Chain::FromHalves->new( $half1, $half2 ); } } return @chains; } sub chains_even($@) { my( $graph, @halves ) = @_; @halves = map { ChemOnomatopist::ChainHalf->new( $graph, shift @$_, @$_ ) } @halves; my @chains; for my $half1 (@halves) { for my $half2 (@halves) { next if $half1->group eq $half2->group; push @chains, ChemOnomatopist::Chain::FromHalves->new( $half1, $half2 ); } } return @chains; } plan tests => 12; my $graph; my @paths; $graph = Graph::Undirected->new; for (1..10) { $graph->add_edge( 0, $_ ); } is( scalar graph_longest_paths_from_vertex( $graph, 0 ), 10 ); is( scalar graph_longest_paths( $graph ), 45 ); $graph->add_edge( 1, 11 ); is( scalar graph_longest_paths_from_vertex( $graph, 0 ), 1 ); is( scalar graph_longest_paths( $graph ), 9 ); # Elongated X-shaped graph with an odd-numbered longest path $graph = Graph::Undirected->new; $graph->add_path( 'A'..'C' ); $graph->add_edge( 'A', 'A1' ); $graph->add_edge( 'A', 'A2' ); $graph->add_edge( 'C', 'C1' ); $graph->add_edge( 'C', 'C2' ); is scalar graph_longest_paths( $graph ), 4; @paths = ChemOnomatopist::rule_greatest_number_of_side_chains( chains_odd $graph, [ 'B', 'A', 'A1' ], [ 'B', 'C', 'C1' ] ); is scalar @paths, 2; @paths = ChemOnomatopist::rule_greatest_number_of_side_chains( chains_odd $graph, [ 'B', 'A', 'A1' ], [ 'B', 'A', 'A2' ], [ 'B', 'C', 'C1' ] ); is scalar @paths, 6; # Elongated X-shaped graph with an even-numbered longest path $graph = Graph::Undirected->new; $graph->add_path( 'A'..'D' ); $graph->add_edge( 'A', 'A1' ); $graph->add_edge( 'A', 'A2' ); $graph->add_edge( 'D', 'D1' ); $graph->add_edge( 'D', 'D2' ); is scalar graph_longest_paths( $graph ), 4; @paths = ChemOnomatopist::rule_greatest_number_of_side_chains( chains_even $graph, [ 'C', 'B', 'A', 'A1' ], [ 'B', 'C', 'D', 'D1' ] ), is scalar @paths, 2; @paths = ChemOnomatopist::rule_greatest_number_of_side_chains( chains_even $graph, [ 'C', 'B', 'A', 'A1' ], [ 'C', 'B', 'A', 'A2' ], [ 'B', 'C', 'D', 'D1' ] ); is scalar @paths, 4; # Elongated Y-shaped graph with an even-numbered longest path $graph = Graph::Undirected->new; $graph->add_path( 'AA1'..'AA5' ); $graph->add_path( 'AB1'..'AB5' ); $graph->add_path( 'BA1'..'BA5' ); $graph->add_edge( 'A', 'AA1' ); $graph->add_edge( 'A', 'AB1' ); $graph->add_edge( 'B', 'BA1' ); $graph->add_edge( 'A', 'B' ); @paths = ChemOnomatopist::rule_greatest_number_of_side_chains( chains_even $graph, [ 'B', 'A', 'AA1'..'AA5' ], [ 'B', 'A', 'AB1'..'AB5' ], [ 'A', 'B', 'BA1'..'BA5' ] ); is scalar @paths, 4; $graph->add_path( 'AA2', 'branch' ); @paths = ChemOnomatopist::rule_greatest_number_of_side_chains( chains_even $graph, [ 'B', 'A', 'AA1'..'AA5' ], [ 'B', 'A', 'AB1'..'AB5' ], [ 'A', 'B', 'BA1'..'BA5' ] ); is scalar @paths, 2; ChemOnomatopist-0.6.1/t/05_numbers.t000066400000000000000000000102741452012116100172330ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; # Collected by MiglÄ— my @prefixes = qw( meth eth prop but pent hex hept oct non dec undec dodec tridec tetradec pentadec hexadec heptadec octadec nonadec icos henicos docos tricos tetracos pentacos hexacos heptacos octacos nonacos triacont hentriacont dotriacont tritriacont tetratriacont pentatriacont hexatriacont heptatriacont octatriacont nonatriacont tetracont hentetracont dotetracont tritetracont tetratetracont pentatetracont hexatetracont heptatetracont octatetracont nonatetracont pentacont henpentacont dopentacont tripentacont tetrapentacont pentapentacont hexapentacont heptapentacont octapentacont nonapentacont hexacont henhexacont dohexacont trihexacont tetrahexacont pentahexacont hexahexacont heptahexacont octahexacont nonahexacont heptacont henheptacont doheptacont triheptacont tetraheptacont pentaheptacont hexaheptacont heptaheptacont octaheptacont nonaheptacont octacont henoctacont dooctacont trioctacont tetraoctacont pentaoctacont hexaoctacont heptaoctacont octaoctacont nonaoctacont nonacont hennonacont dononacont trinonacont tetranonacont pentanonacont hexanonacont heptanonacont octanonacont nonanonacont hect henhect dohect trihect tetrahect pentahect hexahect heptahect octahect nonahect decahect undecahect dodecahect tridecahect tetradecahect pentadecahect hexadecahect heptadecahect octadecahect nonadecahect icosahect henicosahect docosahect tricosahect tetracosahect pentacosahect hexacosahect heptacosahect octacosahect nonacosahect triacontahect hentriacontahect dotriacontahect tritriacontahect tetratriacontahect pentatriacontahect hexatriacontahect heptatriacontahect octatriacontahect nonatriacontahect tetracontahect hentetracontahect dotetracontahect tritetracontahect tetratetracontahect pentatetracontahect hexatetracontahect heptatetracontahect octatetracontahect nonatetracontahect pentacontahect henpentacontahect dopentacontahect tripentacontahect tetrapentacontahect pentapentacontahect hexapentacontahect heptapentacontahect octapentacontahect nonapentacontahect hexacontahect henhexacontahect dohexacontahect trihexacontahect tetrahexacontahect pentahexacontahect hexahexacontahect heptahexacontahect octahexacontahect nonahexacontahect heptacontahect henheptacontahect doheptacontahect triheptacontahect tetraheptacontahect pentaheptacontahect hexaheptacontahect heptaheptacontahect octaheptacontahect nonaheptacontahect octacontahect henoctacontahect dooctacontahect trioctacontahect tetraoctacontahect pentaoctacontahect hexaoctacontahect heptaoctacontahect octaoctacontahect nonaoctacontahect nonacontahect hennonacontahect dononacontahect trinonacontahect tetranonacontahect pentanonacontahect hexanonacontahect heptanonacontahect octanonacontahect nonanonacontahect ); my %cases = ( # Taken from: # https://en.wikipedia.org/w/index.php?title=IUPAC_numerical_multiplier&oldid=1086173027 241 => 'hentetracontadict', 411 => 'undecatetract', 548 => 'octatetracontapentact', 9267 => 'heptahexacontadictanonali', # Taken from BBv2 P-14.2.1.2 363 => 'trihexacontatrict', 486 => 'hexaoctacontatetract', ); plan tests => scalar( @prefixes ) + scalar keys %cases; for (0..$#prefixes) { is( ChemOnomatopist::alkane_chain_name( $_ + 1 ), $prefixes[$_], "Number $_" ); } for (sort keys %cases) { is( ChemOnomatopist::alkane_chain_name( $_ ), $cases{$_}, "Number $_" ); } ChemOnomatopist-0.6.1/t/06_unsupported.t000066400000000000000000000014461452012116100201520ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my %unsupported = ( # 'N1NNN1' => 'cannot handle complicated monocycles for now', # FIXME: This is not supported 'C1=CC=C2C(=C1)C=CC3=C2N=CC=C3' => 'cannot handle complicated cyclic compounds', # PubChem 9191 'C1=CC=CC=2C1=C1C=C3C=C4C=CC=CC4=CC3=CC1=CC2' => 'cannot handle complicated cyclic compounds', # PubChem 67470 'C1=CC2=C3C=CC(=CC=C4C=CC(=C5C=CC(=CC=C1C=C2)C=C5)C=C4)C=C3' => 'cannot handle complicated cyclic compounds', # PubChem 157100544 ); plan tests => scalar keys %unsupported; for my $case (sort keys %unsupported) { my $message; eval { ChemOnomatopist::get_name( $case ) }; $message = $@ if $@; $message =~ s/\n$// if $message; is( $message, $unsupported{$case} ); } ChemOnomatopist-0.6.1/t/07_trees.t000066400000000000000000000006011452012116100166750ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist::Util::Graph qw( tree_number_of_branches ); use Graph::Undirected; use Test::More; plan tests => 3; my $graph = Graph::Undirected->new; $graph->add_path( 'A'..'E' ); is tree_number_of_branches( $graph, 'A'..'E' ), 0; is tree_number_of_branches( $graph, 'A'..'D' ), 1; is tree_number_of_branches( $graph, 'B'..'D' ), 2; ChemOnomatopist-0.6.1/t/08_locants.t000066400000000000000000000045111452012116100172230ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Chain::FromHalves; use ChemOnomatopist::ChainHalf; use ChemOnomatopist::MolecularGraph; use Test::More; sub chains($@) { my( $graph, @halves ) = @_; @halves = map { ChemOnomatopist::ChainHalf->new( $graph, undef, @$_ ) } @halves; my @chains; for my $half1 (@halves) { for my $half2 (@halves) { next if $half1 == $half2; push @chains, ChemOnomatopist::Chain::FromHalves->new( $half1, $half2 ); } } return @chains; } plan tests => 4; my $graph; my @paths; my @atoms = map { { symbol => 'C', number => $_ } } 0..38; $graph = ChemOnomatopist::MolecularGraph->new; $graph->add_path( map { $atoms[$_] } ( 1, 0, 11..17 ) ); $graph->add_path( map { $atoms[$_] } ( 1, 22..27 ) ); $graph->add_path( map { $atoms[$_] } ( 1, 32..37 ) ); $graph->add_edge( map { $atoms[$_] } ( 11, 18 ) ); $graph->add_edge( map { $atoms[$_] } ( 25, 28 ) ); $graph->add_edge( map { $atoms[$_] } ( 32, 38 ) ); @paths = ChemOnomatopist::rule_lowest_numbered_locants( chains $graph, [ map { $atoms[$_] } ( 0, 11..17 ) ], [ map { $atoms[$_] } ( 0, 1, 22..27 ) ], [ map { $atoms[$_] } ( 0, 1, 32..37 ) ] ); is scalar( @paths ), 1; is join( ',', map { $_->{number} } $paths[0]->vertices ), '27,26,25,24,23,22,1,0,11,12,13,14,15,16,17'; my $chain = ChemOnomatopist::select_mainchain( $graph ); is join( ',', map { $_->{number} } $chain->vertices ), '27,26,25,24,23,22,1,0,11,12,13,14,15,16,17'; # Figure 7 from UrbonaitÄ—, 2022. # In the image, however, one branch is held as having priority over another, while in fact they are equal. $graph = Graph::Undirected->new; $graph->add_path( 'A'..'G' ); $graph->add_path( 'B', 'H' ); $graph->add_path( 'D', 'I'..'K' ); $graph->add_path( 'J', 'L' ); $graph->add_path( 'E', 'M' ); @paths = ChemOnomatopist::rule_lowest_numbered_locants( chains $graph, [ reverse 'A'..'D' ], [ 'D'..'G' ], [ 'D', 'I'..'K' ] ); is scalar( @paths ), 2; ChemOnomatopist-0.6.1/t/09_sidechain_carbons.t000066400000000000000000000025721452012116100212240ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use Algorithm::Combinatorics qw(combinations); use ChemOnomatopist; use ChemOnomatopist::Chain::FromHalves; use ChemOnomatopist::ChainHalf; use Graph::Undirected; use Test::More; sub chain($@) { my( $graph, @vertices ) = @_; return ChemOnomatopist::ChainHalf->new( $graph, $vertices[1], @vertices ); } plan tests => 2; my $graph; my @chains; $graph = Graph::Undirected->new; $graph->add_path( 0, 11..15 ); $graph->add_path( 0, 21..25 ); $graph->add_path( 0, 31..35 ); $graph->add_path( 0, 41..45 ); $graph->add_path( 12, 16 ); $graph->add_path( 22, 26, 27 ); @chains = map { ChemOnomatopist::Chain::FromHalves->new( @$_ ) } grep { $_->[0] ne $_->[1] } combinations [ chain( $graph, 0, 11..15 ), chain( $graph, 0, 21..25 ), chain( $graph, 0, 31..35 ), chain( $graph, 0, 41..45 ) ], 2; @chains = ChemOnomatopist::rule_most_carbon_in_side_chains( @chains ); is scalar( @chains ), 6; @chains = map { ChemOnomatopist::Chain::FromHalves->new( @$_ ) } grep { $_->[0] ne $_->[1] } combinations [ chain( $graph, 0, 11..15 ), chain( $graph, 0, 31..35 ), chain( $graph, 0, 41..45 ) ], 2; @chains = ChemOnomatopist::rule_most_carbon_in_side_chains( @chains ); is scalar( @chains ), 3; ChemOnomatopist-0.6.1/t/10_locants.t000066400000000000000000000025071452012116100172170ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use ChemOnomatopist::Chain::FromHalves; use ChemOnomatopist::ChainHalf; use Graph::Undirected; use Test::More; sub chain { my( $graph, $other_center, @vertices ) = @_; return ChemOnomatopist::ChainHalf->new( $graph, $other_center, @vertices ); } plan tests => 4; my @atoms = map { { symbol => 'C', number => $_ } } 0..99; # A graph representing octane my $graph = Graph::Undirected->new( refvertexed => 1 ); $graph->add_path( map { $atoms[$_] } ( 1..8 ) ); my( $A, $B ); $A = chain( $graph, map { $atoms[$_] } ( 5, reverse 1..4 ) ); $B = chain( $graph, map { $atoms[$_] } ( 4, 5..8 ) ); is( ChemOnomatopist::Chain::FromHalves->new( $A, $B )->branch_positions, 0 ); is( ChemOnomatopist::Chain::FromHalves->new( $B, $A )->branch_positions, 0 ); # The following additions transform the graph to 2,4,7-trimethyloctane $graph->add_edge( map { $atoms[$_] } ( 2, 12 ) ); $graph->add_edge( map { $atoms[$_] } ( 4, 42 ) ); $graph->add_edge( map { $atoms[$_] } ( 7, 72 ) ); $A = chain( $graph, map { $atoms[$_] } ( 5, reverse 1..4 ) ); $B = chain( $graph, map { $atoms[$_] } ( 4, 5..8 ) ); is join( ',', ChemOnomatopist::Chain::FromHalves->new( $A, $B )->branch_positions ), '1,3,6'; is join( ',', ChemOnomatopist::Chain::FromHalves->new( $B, $A )->branch_positions ), '1,4,6'; ChemOnomatopist-0.6.1/t/11_ketones.t000066400000000000000000000015531452012116100172250ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my %SMILES_cases = ( # 'CCCC=O' => 'butan-1-one', # FIXME 'CC(=O)CC' => 'butan-2-one', 'CCCC(=O)CCCC(=O)CCCC(=O)CCCC(=O)C' => 'heptadecane-2,6,10,14-tetrone', 'CC(C)C(=O)CC(=O)C' => '5-methylhexane-2,4-dione', 'CC(C)CC(=O)C(CCC(=O)C)C(=O)CC(C)C' => '8-methyl-5-(3-methyl-1-oxobutyl)nonane-2,6-dione', # PubChem has '8-methyl-5-(3-methylbutanoyl)nonane-2,6-dione' # From BBv2 P-64.6.1 'CC(=S)CC' => 'butane-2-thione', 'CCC(CCC)=[Se]' => 'hexane-3-selone', 'CC(CC(C)=S)=S' => 'pentane-2,4-dithione', 'OC(C)C(C(C(C(C)O)=C)=O)=C' => '2,6-dihydroxy-3,5-dimethylideneheptan-4-one', # BBv2 P-64.7.1 ); plan tests => scalar( keys %SMILES_cases ); for my $case (sort keys %SMILES_cases) { is ChemOnomatopist::get_name( $case ), $SMILES_cases{$case}; } ChemOnomatopist-0.6.1/t/12_hydroxy.t000066400000000000000000000031011452012116100172530ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'OCC(CO)(CO)CO', iupac => '2,2-bis(hydroxymethyl)propane-1,3-diol' }, { smiles => 'C1(CCCCC1)CCO', iupac => '2-cyclohexylethan-1-ol' }, # From BBv2 P-13.5.2 { smiles => 'C1(CCCCC1)CO', iupac => 'cyclohexylmethanol' }, # From BBv2 P-15.6.1.1 { smiles => 'CC(C)(C)O', iupac => '2-methylpropan-2-ol' }, # From BBv2 P-63.1.2 { smiles => 'ON1CC(CCC1)C#N', iupac => '1-hydroxypiperidine-3-carbonitrile' }, # From BBv2 P-63.1.4 # From BBv2 P-63.1.5 { smiles => 'CC(C)S', iupac => 'propane-2-thiol' }, { smiles => 'C(C)[SeH]', iupac => 'ethaneselenol' }, { smiles => 'SCCCCS', iupac => 'butane-1,4-dithiol' }, { smiles => 'SC1=CC=CC=C1', iupac => 'benzenethiol' }, { smiles => 'SCCC(=O)O', iupac => '3-sulfanylpropanoic acid' }, { smiles => 'SC1=C(C=CC=C1)O', iupac => '2-sulfanylphenol' }, { smiles => 'OC(CS)C1CCC(C(C1)O)S', iupac => '5-(1-hydroxy-2-sulfanylethyl)-2-sulfanylcyclohexan-1-ol', AUTHOR => 1 }, { smiles => 'SC(CC(=O)O)CS', iupac => '3,4-bis(sulfanyl)butanoic acid', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/13_multigroup.t000066400000000000000000000030611452012116100177620ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'CC(=O)CC(=O)CCCO', iupac => '7-hydroxyheptane-2,4-dione' }, { smiles => 'CC(C)C(CCC(C)O)CCC(=O)C', iupac => '8-hydroxy-5-propan-2-ylnonan-2-one', AUTHOR => 1 }, # PubChem 537619 { smiles => 'C(O)OO', iupac => 'hydroperoxymethanol' }, { smiles => 'OOCC(=O)C', iupac => '1-hydroperoxypropan-2-one' }, # Unchecked # From BBv2 P-65.1.2.4 { smiles => 'CC(=O)CCCC(=O)O', iupac => '5-oxohexanoic acid' }, { smiles => 'C(=O)(O)C(O)C(C(=O)O)C(=O)C(=O)O', iupac => '1-hydroxy-3-oxopropane-1,2,3-tricarboxylic acid' }, { smiles => 'O=CCCC(=O)O', iupac => '4-oxobutanoic acid', AUTHOR => 1 }, # BBv2 P-66.6.1.3 - FIXME { smiles => 'CCCCCCCCC(C=O)C(CC)O', iupac => '2-(1-hydroxypropyl)decanal' }, { smiles => 'S=C(CC(=O)O)C', iupac => '3-sulfanylidenebutanoic acid' }, { smiles => '[N+](=O)([O-])C', iupac => 'nitromethane' }, # BBv2 P-61.5.1 { smiles => 'I(=O)(=O)(=O)CCC', iupac => 'periodylpropane', AUTHOR => 1 }, { smiles => 'I(=O)(=O)(=O)C(C)C', iupac => '2-periodylpropane' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/14_carboxylic_acids.t000066400000000000000000000021551452012116100210610ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-65.1.2 { smiles => 'CCCC(=O)O', iupac => 'butanoic acid' }, { smiles => 'OC(=O)CCCCCCCCCCC(=O)O', iupac => 'dodecanedioic acid' }, # From BBv2 P-65.1.2.2.1 { smiles => 'OC(=O)CCC(C(=O)O)CCC(=O)O', iupac => 'pentane-1,3,5-tricarboxylic acid' }, { smiles => 'C(C(=O)O)(C(=O)O)C(C(=O)O)(C(=O)O)', iupac => 'ethane-1,1,2,2-tetracarboxylic acid' }, { smiles => 'C(=O)O', iupac => 'formic acid', AUTHOR => 1 }, { smiles => 'C1=CC2=C(C=C1C(=O)O)SC(=N2)C(F)F', iupac => '2-(difluoromethyl)-1,3-benzothiazole-6-carboxylic acid' }, # PubChem 84692459 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/15_aldehydes.t000066400000000000000000000016211452012116100175170ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-66.6.1.1.1 { smiles => 'CCCCC=O', iupac => 'pentanal' }, { smiles => 'O=CCCCC=O', iupac => 'pentanedial' }, { smiles => 'O=CCC(C=O)CCC=O', iupac => 'butane-1,2,4-tricarbaldehyde' }, # From BBv2 P-66.6.1.1.2 { smiles => 'CN(N=NN(C)C)C=O', iupac => '1,4,4-trimethyltetraaz-2-ene-1-carbaldehyde', AUTHOR => 1 }, # From BBv2 P-66.6.1.1.3 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/16_heteroatoms.t000066400000000000000000000045301452012116100201120ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'COCCOCCOCCOCC', iupac => '2,5,8,11-tetraoxatridecane' }, # BBv2 P-12.1 { smiles => 'C(F)(F)(F)C(F)(F)CO', iupac => '2,2,3,3,3-pentafluoropropan-1-ol' }, # BBv2 P-14.3.4.5 { smiles => 'ClC(C(Cl)(Cl)Cl)(Cl)Cl', iupac => 'hexachloroethane', AUTHOR => 1 }, { smiles => 'ClC(=C(Cl)Cl)Cl', iupac => 'tetrachloroethene', AUTHOR => 1 }, { smiles => 'C1CCCCC1CCOCC', iupac => '(2-ethoxyethyl)cyclohexane' }, { smiles => 'C1CCCCC1OCCCC', iupac => 'butoxycyclohexane' }, # PubChem 13299482 # From BBv2 P-15.4.3.1 { smiles => 'COCSSCCOCC[Se]C', iupac => '2,8-dioxa-4,5-dithia-11-selenadodecane' }, { smiles => '[Si]OCS[Si]', iupac => '2-oxa-4-thia-1,5-disilapentane' }, # From BBv2 P-15.4.3.2.1 { smiles => 'C[Si]C[Si]C[Si]CSCC', iupac => '8-thia-2,4,6-trisiladecane' }, { smiles => 'C[Si]C[Si]C[Si]COC', iupac => '2-oxa-4,6,8-trisilanonane', AUTHOR => 1 }, # From BBv2 P-15.4.3.2.3 { smiles => 'C[Si]C[Si]C[Si]C[Si]C(=O)O', iupac => '2,4,6,8-tetrasilanonan-1-oic acid' }, { smiles => 'C[Si]C[Si]C[Si]C[Si]CCO', iupac => '2,4,6,8-tetrasiladecan-10-ol' }, { smiles => 'C[SiH2]C[SiH2]C[SiH2]C[SiH2]C=C', iupac => '2,4,6,8-tetrasiladec-9-ene' }, # BBv2 P-15.4.3.2.4 { smiles => 'BrC(CCCC(CCl)CBr)Cl', iupac => '1-bromo-5-(bromomethyl)-1,6-dichlorohexane', AUTHOR => 1 }, # BBv2 P-45.2.3 # From BBv2 P-61.5.2 { smiles => 'ON(O)OC(C)(C)C(C(O)=O)NC(C)(C)C', iupac => '2-(tert-butylimino)-3-methyl-3-(nitrooxy)butanoic acid', AUTHOR => 1 }, { smiles => 'FC(C(CC)F)C(CC(CCCC)CC)CCCCCC', iupac => '7-(1,2-difluorobutyl)-5-ethyltridecane' }, # BBv2 P-14.5.2 # From BBv2 P-31.1.2.2.1 { smiles => '[SiH3][SiH]=[SiH][SiH2][SiH2][SiH3]', iupac => 'hexasil-2-ene', AUTHOR => 1 }, # FIXME: Need to elide vowels { smiles => '[SiH]#[SiH]', iupac => 'disilyne' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/17_long.t000066400000000000000000000016511452012116100165210ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; if( !$ENV{EXTENDED_TESTING} ) { plan skip_all => "Skip \$ENV{EXTENDED_TESTING} is not set\n"; } my %SMILES_cases = ( 'CCCCCC(CC(CC)CC)CC(CC(CC(CC)CC)CC(CC)CC)CC(CC(CC(CC)CC)CC(CC)CC)CC(CC(CC(CC(CC(CC)CC)CC(CC)CC)CC(CC(CC)CC)CC(CC)CC)CC(CC(CC(CC)CC)CC(CC)CC)CC(CC(CC)CC)CC(CC)CC)CC(CC(CC(CC(CC)CC)CC(CC)CC)CC(CC(CC)CC)CC(CC)CC)CC(CC(CC(CC)CC)CC(CC)CC)CC(CC(CC)CC)CC(CC)CC' => '3-ethyl-5,17-bis(2-ethylbutyl)-11-[8-ethyl-6-(2-ethylbutyl)-2-[6-ethyl-4-(2-ethylbutyl)-2-[4-ethyl-2-(2-ethylbutyl)hexyl]octyl]-4-[4-ethyl-2-(2-ethylbutyl)hexyl]decyl]-9-[6-ethyl-4-(2-ethylbutyl)-2-[4-ethyl-2-(2-ethylbutyl)hexyl]octyl]-7,13,15-tris[4-ethyl-2-(2-ethylbutyl)hexyl]docosane', ); plan tests => scalar( keys %SMILES_cases ); for my $case (sort keys %SMILES_cases) { my $name = $SMILES_cases{$case}; is ChemOnomatopist::get_name( $case ), $name; } ChemOnomatopist-0.6.1/t/18_hydroperoxides.t000066400000000000000000000024601452012116100206320ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'CCOO', iupac => 'ethaneperoxol' }, # From BBv2 P-56.2 { smiles => 'CSO', iupac => 'methane-SO-thioperoxol' }, { smiles => 'C1(=CC=CC=C1)[Se][SeH]', iupac => 'benzenediselenoperoxol' }, # From BBv2 P-63.4.2.1 { smiles => 'CCCOS', iupac => 'propane-1-OS-thioperoxol' }, { smiles => 'CCSS', iupac => 'ethanedithioperoxol' }, { smiles => 'CS[Se][H]', iupac => 'methane-SSe-selenothioperoxol' }, # From BBv2 P-63.4.2.2 { smiles => 'OOCCO', iupac => '2-hydroperoxyethan-1-ol' }, { smiles => 'SSCC(=O)O', iupac => 'disulfanylacetic acid' }, # From BBv2 P-63.7 { smiles => '[SeH]OCCOO', iupac => '2-(selanyloxy)ethane-1-peroxol', AUTHOR => 1 }, { smiles => 'NCC(C)(OO)C', iupac => '1-amino-2-methylpropane-2-peroxol' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/19_amines.t000066400000000000000000000100041452012116100170300ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'CC(CN)O', iupac => '1-aminopropan-2-ol' }, { smiles => 'C(CC(=O)N)C(=O)C(=O)O', iupac => '5-amino-2,5-dioxopentanoic acid', AUTHOR => 1 }, # PubChem 48: seems a bit strange { smiles => 'CC(CC(C(=O)O)N)N', iupac => '2,4-diaminopentanoic acid' }, { smiles => 'C(CN)C=O', iupac => '3-aminopropanal' }, { smiles => 'C(CCN)CCN', iupac => 'pentane-1,5-diamine', AUTHOR => 1 }, { smiles => 'CC(CC1=CC=C(NCCC(C)C)C=C1)CC', iupac => '4-(2-methylbutyl)-N-(3-methylbutyl)aniline' }, # BBv2 P-14.5.4 { smiles => 'NC1=CC=CC=C1', iupac => 'aniline' }, # BBv2 P-34.1.1.5 { smiles => 'COC1=CC=C(NC2=CC=CC=C2)C=C1', iupac => '4-methoxy-N-phenylaniline' }, # BBv2 P-45.2.1 { smiles => 'NC1=C(OC2=C(NC)C=CC=C2)C=CC(=C1)C', iupac => '2-(2-amino-4-methylphenoxy)-N-methylaniline', AUTHOR => 1 }, # BBv2 P-45.2.2 { smiles => 'BrC1=C(NC2=C(C=C(C=C2)Br)Cl)C=CC(=C1)Cl', iupac => '2-bromo-N-(4-bromo-2-chlorophenyl)-4-chloroaniline' }, # BBv2 P-45.2.3 { smiles => 'BrC1=C(NC2=C(C=C(C=C2)Br)Br)C=CC(=C1)Cl', iupac => '2-bromo-4-chloro-N-(2,4-dibromophenyl)aniline', AUTHOR => 1 }, # BBv2 P-45.5 { smiles => 'CNC', iupac => 'N-methylmethanamine' }, # BBv2 P-52.1.3 # From BBv2 P-62.2.1.1.1 { smiles => 'CNC1=CC=CC=C1', iupac => 'N-methylaniline' }, { smiles => 'ClC1=CC=C(N)C=C1', iupac => '4-chloroaniline' }, { smiles => 'CC1=CC=C(N)C=C1', iupac => '4-methylaniline' }, # BBv2 P-62.2.1.1.2 # From BBv2 P-62.2.1.2 { smiles => 'CN', iupac => 'methanamine' }, { smiles => 'CC(CN)C', iupac => '2-methylpropan-1-amine' }, # From BBv2 P-62.2.2.1 { smiles => 'C(C)N(CC)CC', iupac => 'N,N-diethylethanamine' }, { smiles => 'ClCCNCCC', iupac => 'N-(2-chloroethyl)propan-1-amine', AUTHOR => 1 }, # FIXME: This is strange { smiles => 'C(C)N(CCCC)CCC', iupac => 'N-ethyl-N-propylbutan-1-amine' }, { smiles => 'C1(=CC=CC=C1)NC=1C=NC=CC1', iupac => 'N-phenylpyridin-3-amine' }, { smiles => 'C1(=CC=CC=C1)NC1=CC=CC=C1', iupac => 'N-phenylaniline' }, { smiles => 'C1(CCCCC1)NC1=CC=CC=C1', iupac => 'N-cyclohexylaniline' }, # From BBv2 P-62.2.2.2 { smiles => 'CC(C#CCN(CCC)CCC)=C', iupac => '4-methyl-N,N-dipropylpent-4-en-2-yn-1-amine' }, { smiles => 'CN(C(C)C=CC1CC=C(CC1)C)C', iupac => 'N,N-dimethyl-4-(4-methylcyclohex-3-en-1-yl)but-3-en-2-amine', AUTHOR => 1 }, { smiles => 'CN(C(C#C)CC)C', iupac => 'N,N-dimethylpent-1-yn-3-amine', AUTHOR => 1 }, { smiles => 'C(=C)NCCCC', iupac => 'N-ethenylbutan-1-amine' }, { smiles => 'CC(CN(CC(=C)C)CC(=C)C)(C)C', iupac => 'N-(2,2-dimethylpropyl)-2-methyl-N-(2-methylprop-2-en-1-yl)prop-2-en-1-amine', AUTHOR => 1 }, { smiles => 'C1(CCCCC1)NC1=CC=CC=C1', iupac => 'N-cyclohexylaniline' }, { smiles => 'O1C(=CC=C1)NC=1NC=CC1', iupac => 'N-(furan-2-yl)-1H-pyrrol-2-amine', AUTHOR => 1 }, { smiles => 'C(CCC)NC1CC1', iupac => 'N-butylcyclopropanamine' }, { smiles => 'C1=C(C=CC=2CCCCC12)NC1=CC2=CC=CC=C2C=C1', iupac => 'N-(5,6,7,8-tetrahydronaphthalen-2-yl)naphthalen-2-amine', AUTHOR => 1 }, { smiles => 'NCCC(=O)O', iupac => '3-aminopropanoic acid' }, # From BBv2 P-62.2.3 { smiles => 'NCCO', iupac => '2-aminoethan-1-ol' }, # From BBv2 P-63.7 { smiles => 'CC(C)C(C)N1CCCCC(C1=O)NC', iupac => '3-(methylamino)-1-(3-methylbutan-2-yl)azepan-2-one' }, # PubChem 58916315 # FIXME: Misses methylamino { smiles => 'C1CNCCC1CNCCO', iupac => '2-(piperidin-4-ylmethylamino)ethanol', AUTHOR => 1 }, # PubChem 14950460 # CHECKME: Most likely incorrect IUPAC name, should be -ethan-1-ol ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/20_imines.t000066400000000000000000000036161452012116100170430ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-62.3.1.1 { smiles => 'C(CCCCC)=N', iupac => 'hexan-1-imine' }, { smiles => 'CN=CC', iupac => 'N-methylethanimine' }, { smiles => 'ClC1=CC=C(C=C1)N=CC1=CC=C(C=C1)Cl', iupac => 'N,1-bis(4-chlorophenyl)methanimine' }, { smiles => 'ClC1=CC=C(C=C1)C=NC1=CC=C(N)C=C1', iupac => '4-{[(4-chlorophenyl)methylidene]amino}aniline', AUTHOR => 1 }, { smiles => 'S1C(CCC1)=N', iupac => 'thiolan-2-imine' }, { smiles => 'C1C(C=CC2=CC=CC=C12)=N', iupac => 'naphthalen-2(1H)-imine', AUTHOR => 1 }, # From BBv2 P-62.3.1.2 { smiles => 'N=C1CC(CC(C1)=N)=O', iupac => '3,5-diiminocyclohexan-1-one' }, { smiles => 'N=C(CC1CC(CCC1)C(=O)O)C', iupac => '3-(2-iminopropyl)cyclohexane-1-carboxylic acid' }, { smiles => 'N=C1CCC(N1)=O', iupac => '5-iminopyrrolidin-2-one' }, { smiles => 'N=C1C=CC(C=C1)=O', iupac => '4-iminocyclohexa-2,5-dien-1-one' }, # From BBv2 P-62.3.1.3 { smiles => 'CP=N', iupac => '1-methylphosphanimine', AUTHOR => 1 }, { smiles => 'C[Si](=NC1=CC=CC=C1)C', iupac => '1,1-dimethyl-N-phenylsilanimine', AUTHOR => 1 }, { smiles => 'CN=[SiH]CC(=O)OC', iupac => 'methyl [(methylimino)silyl]acetate', AUTHOR => 1 }, { smiles => 'C(C(=N)C(=O)O)C(=O)O', iupac => '2-iminobutanedioic acid' }, { smiles => 'CC(C)C=N', iupac => '2-methylpropan-1-imine' }, { smiles => 'CCCN=CC', iupac => 'N-propylethanimine' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/21_monocycles.t000066400000000000000000000162471452012116100177370ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'C1CCNC1', iupac => 'pyrrolidine' }, { smiles => 'C1=CC=CC=C1', iupac => 'benzene' }, { smiles => 'C=1C=CC=CC=1', iupac => 'benzene' }, { smiles => 'c1ccccc1', iupac => 'benzene' }, { smiles => 'FC1=CC=CC=C1', iupac => 'fluorobenzene' }, { smiles => 'FC=1C(=C(C(=C(C1)F)F)F)F', iupac => 'pentafluorobenzene' }, { smiles => 'FC1=C(C(=C(C(=C1F)F)F)F)F', iupac => 'hexafluorobenzene' }, { smiles => 'C1(C=CCCC1)O', iupac => 'cyclohex-2-en-1-ol' }, # From BBv2 P-14.3.2 { smiles => 'N1=C(C=NC=C1)C(=O)O', iupac => 'pyrazinecarboxylic acid', AUTHOR => 1 }, # From BBv2 P-14.3.4.3 { smiles => 'C1(C=CCCC1)N', iupac => 'cyclohex-2-en-1-amine' }, # From BBv2 P-14.4 { smiles => 'C1(CCCC1)=CC(=O)O', iupac => 'cyclopentylideneacetic acid' }, # From BBv2 P-15.6.2 # From BBv2 P-22.1.3 { smiles => 'CC1=CC=CC=C1', iupac => 'toluene' }, { smiles => 'C=1(C(=CC=CC1)C)C', iupac => '1,2-xylene' }, { smiles => 'C1(=CC(=CC=C1)C)C', iupac => '1,3-xylene' }, { smiles => 'C1(=CC=C(C=C1)C)C', iupac => '1,4-xylene' }, { smiles => 'CC1=CC(=CC(=C1)C)C', iupac => '1,3,5-trimethylbenzene' }, { smiles => 'N(=O)C1=CC=CC=C1', iupac => 'nitrosobenzene' }, # From BBv2 P-59.1.9 { smiles => 'Br(=O)C1=CC=CC=C1', iupac => 'bromosylbenzene' }, # From BBv2 P-67.1.4.5 { smiles => 'N1C=NC=C1', iupac => '1H-imidazole' }, # From BBv2 P-22.2.1 # From BBv2 P-61.5.1 { smiles => 'CC1=C(C=C(C=C1[N+](=O)[O-])[N+](=O)[O-])[N+](=O)[O-]', iupac => '2-methyl-1,3,5-trinitrobenzene' }, # From BBv2 P-22.2.2.1.1 { smiles => 'S1C=CC=CC=C1', iupac => 'thiepine' }, { smiles => 'O1CCCCCCC1', iupac => 'oxocane' }, # From BBv2 P-22.2.2.1.5.2 { smiles => 'O1CCC1', iupac => 'oxetane' }, { smiles => 'N1CCC1', iupac => 'azetidine' }, { smiles => 'C1NNN1', iupac => 'triazetidine' }, # PubChem 23033474 # From BBv2 P-22.2.2.1.2 { smiles => 'N1=CC=CN=CC=C1', iupac => '1,5-diazocine' }, { smiles => 'O1COCC1', iupac => '1,3-dioxolane' }, # From BBv2 P-22.2.2.1.3 { smiles => 'S1C=NC=C1', iupac => '1,3-thiazole' }, { smiles => 'O1SCCC1', iupac => '1,2-oxathiolane' }, { smiles => 'O1SCCCSC1', iupac => '1,2,6-oxadithiepane' }, { smiles => 'O1N=CC=P1', iupac => '1,2,5-oxazaphosphole' }, { smiles => 'O1CCCCCCOCCCCCCCCCC1', iupac => '1,8-dioxacyclooctadecane' }, # From BBv2 P-22.2.4 # From BBv2 P-25.2.2.1.2 { smiles => 'O1C=CC=CC=COC=CC=CC=CC=CC=C1', iupac => '1,8-dioxacyclooctadeca-2,4,6,9,11,13,15,17-octaene' }, { smiles => 'O1CC=NC=CC=NC=CN=CC=C1', iupac => '1-oxa-4,8,11-triazacyclotetradeca-3,5,7,9,11,13-hexaene' }, # From BBv2 P-63.1.1.1 { smiles => 'C1=CC=CC=C1O', iupac => 'phenol' }, { smiles => 'BrC1=C(C=CC=C1)O', iupac => '2-bromophenol' }, { smiles => 'C(=O)(O)C1=CC=CC=C1', iupac => 'benzoic acid' }, { smiles => 'N1C(CCCCC1)=S', iupac => 'azepane-2-thione' }, # From BBv2 P-64.6.1 # From BBv2 P-31.1.3.1 { smiles => 'C1C=CCCC1', iupac => 'cyclohexene' }, { smiles => 'C1C=CCC=C1', iupac => 'cyclohexa-1,4-diene' }, # From BBv2 P-31.1.3.2 { smiles => 'O1C=COCCOCCOCC1', iupac => '1,4,7,10-tetraoxacyclododec-2-ene' }, { smiles => 'O1CC=NCCCCCCCC1', iupac => '1-oxa-4-azacyclododec-3-ene' }, { smiles => '[SiH2]1CC#CC=CC=CCC[SiH2]CCCCCCCCC1', iupac => '1,11-disilacycloicosa-5,7-dien-3-yne' }, { smiles => '[SiH2]1CCCCCCCC[SiH2]CC=CC=CC=CC#CC1', iupac => '1,10-disilacycloicosa-12,14,16-trien-18-yne', AUTHOR => 1 }, # flaky # From BBv2 P-31.1.3.4 { smiles => 'C=CC1=CC=CC=C1', iupac => 'ethenylbenzene' }, { smiles => 'C=C1C=CC=C1', iupac => '5-methylidenecyclopenta-1,3-diene', AUTHOR => 1 }, { smiles => 'S1CCNCCC1', iupac => '1,4-thiazepane' }, # From BBv2 P-31.2.3.2 # From BBv2 P-14.5.1 { smiles => 'C1CCCCC1(C)CC', iupac => '1-ethyl-1-methylcyclohexane' }, { smiles => 'CCC1CCC(C)CC1', iupac => '1-ethyl-4-methylcyclohexane' }, { smiles => 'CC1=NC(=CC=C1)C', iupac => '2,6-dimethylpyridine' }, { smiles => 'C1CCCCC1(C(C)(C)C)(CCCC)', iupac => '1-butyl-1-tert-butylcyclohexane' }, # Simplified version of example from BBv2 P-14.5.1 { smiles => 'C(=O)(O)CC([Br])([Br])C1CCCCC1', iupac => '3,3-dibromo-3-cyclohexylpropanoic acid' }, { smiles => 'ClC=1C=CC=CC=1C(F)(F)C(F)(F)F', iupac => '1-chloro-2-(pentafluoroethyl)benzene', AUTHOR => 1 }, # From BBv2 P-14.3.4.5 { smiles => 'CC(CCC)C1=CC=C(C=C1)C(CC)CC', iupac => '1-(pentan-2-yl)-4-(pentan-3-yl)benzene', AUTHOR => 1 }, # From BBv2 P-14.5.4 # flaky # From BBv2 P-14.5.3 { smiles => 'C(C)(C)(C)C=1C=CC=C(C(C)CC)C=1', iupac => '1-(butan-2-yl)-3-tert-butylbenzene' }, { smiles => 'O=C1NC(=O)NC=C1C', iupac => '5-methylpyrimidine-2,4(1H,3H)-dione', AUTHOR => 1 }, # thymine { smiles => 'c1cc(oc1)C=O', iupac => 'furan-2-carbaldehyde', AUTHOR => 1 }, # furfural { smiles => 'O(c1cc(cc(OC)c1OC)CCN)C', iupac => '2-(3,4,5-trimethoxyphenyl)ethanamine' }, # mescaline, SMILES from Wikipedia { smiles => 'COC=1C=C(C=C(C1OC)OC)CCN', iupac => '2-(3,4,5-trimethoxyphenyl)ethanamine' }, # mescaline { smiles => 'C1CCCCC1C=O', iupac => 'cyclohexanecarbaldehyde', AUTHOR => 1 }, # From BBv2 P-66.6.1.1.3 { smiles => 'S=C(CC1CC(CCC1)CC(CC)=O)CC', iupac => '1-[3-(2-sulfanylidenebutyl)cyclohexyl]butan-2-one' }, # From BBv2 P-64.7.3 { smiles => 'C1=CC(=CC=C1F)Cl(=O)(=O)=O', iupac => '1-fluoro-4-perchlorylbenzene' }, # PubChem 24972904 { smiles => 'CCC1CN1Cl(=O)(=O)=O', iupac => '2-ethyl-1-perchlorylaziridine', AUTHOR => 1 }, # PubChem 24973518 { smiles => 'CC(=CC(=C(C)C)C(=C)C1=CC=CC=C1)C', iupac => '(5-methyl-3-propan-2-ylidenehexa-1,4-dien-2-yl)benzene' }, # PubChem 141889885 # From BBv2 P-59.2.1.6 { smiles => 'OC1CCC(CC1)C(CCCCCO)O', iupac => '1-(4-hydroxycyclohexyl)hexane-1,6-diol', AUTHOR => 1 }, { smiles => 'O=C(CC1CC(C(C1)=O)=O)CC', iupac => '4-(2-oxobutyl)cyclopentane-1,2-dione' }, { smiles => 'O=CCCCCCCC1CC(CCC1)C=O', iupac => '3-(7-oxoheptyl)cyclohexane-1-carbaldehyde', AUTHOR => 1 }, # From BBv2 P-59.2.1.7 { smiles => 'S1C(=NC=C1)CC(=O)O', iupac => '(1,3-thiazol-2-yl)acetic acid' }, # From BBv2 P-15.6.1.2 { smiles => 'S1C(=NC=C1)CCCCC(=O)O', iupac => '5-(1,3-thiazol-2-yl)pentanoic acid' }, # Synthetic, based on BBv2 P-15.6.1.2 { smiles => 'CC1=C(C=C(C=C1)C(C)C)O', iupac => '2-methyl-5-(propan-2-yl)phenol' }, # From BBv2 P-63.1.1.2 { smiles => 'S=C1OCCC1C#N', iupac => '2-sulfanylideneoxolane-3-carbonitrile' }, # From BBv2 P-65.6.3.5.1 # From BBv2 P-64.7.1 { smiles => 'NC1C(NCCCC1)=O', iupac => '3-aminoazepan-2-one' }, { smiles => 'ClC=1C(C(=C(C(C1O)=O)Cl)O)=O', iupac => '2,5-dichloro-3,6-dihydroxycyclohexa-2,5-diene-1,4-dione' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/22_comparisons.t000066400000000000000000000022721452012116100201130ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; plan tests => 7; my @sorted; @sorted = sort { ChemOnomatopist::cmp_attachments( $a, $b ) } ( [ 'butyl' ], [ 'tert-butyl' ] ); is join( ';', map { join ',', @$_ } @sorted ), 'butyl;tert-butyl'; @sorted = sort { ChemOnomatopist::cmp_attachments( $a, $b ) } ( [ 'tricosyl' ], [ 'tert-butyl' ] ); is join( ';', map { join ',', @$_ } @sorted ), 'tricosyl;tert-butyl'; @sorted = sort { ChemOnomatopist::cmp_attachments( $a, $b ) } ( [ 'ethyl' ], [ 'methyl' ] ); is join( ';', map { join ',', @$_ } @sorted ), 'ethyl;methyl'; @sorted = sort { ChemOnomatopist::cmp_heteroatom_seniority( $a, $b ) } ( [ 'N' ], [ 'S' ] ); is join( ';', map { join ',', @$_ } @sorted ), 'S;N'; @sorted = sort { ChemOnomatopist::cmp_heteroatom_seniority( $a, $b ) } ( [ 'O' ], [ 'Si' ] ); is join( ';', map { join ',', @$_ } @sorted ), 'O;Si'; @sorted = sort { ChemOnomatopist::cmp_heteroatom_seniority( $a, $b ) } ( [ 'N' ], [ 'O' ] ); is join( ';', map { join ',', @$_ } @sorted ), 'O;N'; @sorted = sort { ChemOnomatopist::cmp_heteroatom_seniority( $a, $b ) } ( [ 'N' ], [ 'O' ], [ 'P' ] ); is join( ';', map { join ',', @$_ } @sorted ), 'O;N;P'; ChemOnomatopist-0.6.1/t/23_esters.t000066400000000000000000000013741452012116100170660ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'CCCCCCCC(=O)OC(C)(C)C', iupac => 'tert-butyl octanoate', AUTHOR => 1 }, # BBv2 P-65.6.3.3.1 { smiles => 'CCOC(=O)CC(=O)OC', iupac => 'ethyl methyl propanedioate', AUTHOR => 1 }, # BBv2 P-65.6.3.3.2.1 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/24_monocycle_names.t000066400000000000000000000013201452012116100207240ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use ChemOnomatopist; use ChemOnomatopist::Chain::Monocycle; use Graph::Traversal::DFS; use Test::More; my @cases = sort keys %ChemOnomatopist::Chain::Monocycle::names; plan tests => scalar @cases; for my $SMILES (@cases) { my $ring = $SMILES; $ring =~ s/^(\[?[A-Za-z][a-z]?\]?)/${1}1/; $ring .= 1; my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $ring ); $graph->delete_vertices( grep { $_->{symbol} eq 'H' } $graph->vertices ); my $cycle = ChemOnomatopist::Chain::Circular->new( $graph, Graph::Traversal::DFS->new( $graph )->dfs ); is $cycle->backbone_SMILES, $SMILES; } ChemOnomatopist-0.6.1/t/25_alkene_alkyne.t000066400000000000000000000035351452012116100203660ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-15.1.7.2.3 { smiles => 'C=CCCO', iupac => 'but-3-en-1-ol' }, { smiles => 'C=CC(C)C', iupac => '3-methylbut-1-ene' }, { smiles => 'CC=CC(O)CC(C)C', iupac => '6-methylhept-2-en-4-ol' }, # From BBv2 P-31.1.1.2 { smiles => 'C=CC=C', iupac => 'buta-1,3-diene' }, { smiles => 'C=CC=CC=CC=CC', iupac => 'nona-1,3,5,7-tetraene' }, { smiles => 'COCCOCCOCCOCC=C', iupac => '2,5,8,11-tetraoxatetradec-13-ene' }, # BBv2 P-31.1.2.2.2 # From BBv2 P-44.4.1.1 { smiles => 'CC=CC#C', iupac => 'pent-3-en-1-yne' }, { smiles => 'CCC=CC', iupac => 'pent-2-ene' }, { smiles => 'CCOCCOCCOCCOC=C', iupac => '3,6,9,12-tetraoxatetradec-1-ene' }, # From BBv2 P-44.4.1.2 { smiles => 'C1C=CCCCCCCCCCCCCCCCCC1', iupac => 'cycloicosene' }, { smiles => 'C1C#CCCCCCCCCCCCCCCCCC1', iupac => 'cycloicosyne' }, { smiles => 'C1=CCCCCCC=CCCCCCCCCCCC1', iupac => 'cycloicosa-1,8-diene' }, { smiles => 'CCC(=C)CCC', iupac => '3-methylidenehexane' }, # BBv2 P-61.2.1 { smiles => 'C(C)=C(C(C)C1=NC=CC=C1)CCC=C(C)C', iupac => '2-(3-ethylidene-7-methyloct-6-en-2-yl)pyridine' }, # BBv2 P-61.2.4 { smiles => 'CCC=CCC', iupac => 'hex-3-ene' }, # Chain halves are joined by double bond { smiles => 'CC#CCC(=O)CNCC#C', iupac => '1-(prop-2-ynylamino)hex-4-yn-2-one' }, # PubChem 116589377 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/26_monospiro.t000066400000000000000000000037021452012116100176060ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-24.2.1 { smiles => 'C1CCCC12CCCCC2', iupac => 'spiro[4.5]decane' }, { smiles => 'C1CCCC12CCCC2', iupac => 'spiro[4.4]nonane' }, # From BBv2 P-24.2.4.1.1 { smiles => 'C1CCCC12OCCCC2', iupac => '6-oxaspiro[4.5]decane' }, { smiles => 'C1CCCC12NCCOC2', iupac => '9-oxa-6-azaspiro[4.5]decane' }, { smiles => 'C1CCCC12CSCNC2', iupac => '7-thia-9-azaspiro[4.5]decane' }, { smiles => 'C1CC2(C1)C(C2(C#N)C#N)(C#N)C#N', iupac => 'spiro[2.3]hexane-1,1,2,2-tetracarbonitrile' }, # PubChem 263661 { smiles => 'CC(C)(C)C1CCC2(C(C1)(CCCO)O)OCCO2', iupac => '8-tert-butyl-6-(3-hydroxypropyl)-1,4-dioxaspiro[4.5]decan-6-ol', AUTHOR => 1 }, # PubChem 496485 { smiles => 'CCCN1C(=O)C(NC(=O)C12CCNCC2)CC(C)C', iupac => '3-(2-methylpropyl)-1-propyl-1,4,9-triazaspiro[5.5]undecane-2,5-dione' }, # PubChem 9856956 { smiles => 'C1CCOC2(C1)CC(=O)CCO2', iupac => '1,7-dioxaspiro[5.5]undecan-4-one' }, # PubChem 11217490 { smiles => 'C1CCC2(C1)CCOC(=O)C2', iupac => '8-oxaspiro[4.5]decan-9-one', AUTHOR => 1 }, # PubChem 12733330 { smiles => 'O1CCOC12CCC(CC2)C(C(C)C)=O', iupac => '1-(1,4-dioxaspiro[4.5]decan-8-yl)-2-methylpropan-1-one' }, # PubChem 13353114 # This might not be a real compound, nevertheless, locants should probably not be added to it { smiles => 'FC1(C(C(C(C(C12C(C(C(C2(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)F', iupac => 'octadecafluorospiro[4.5]decane', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/27_bicycle.t000066400000000000000000000065111452012116100171750ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-25.0 { smiles => 'C1=CC=CC2=CC=CC=C12', iupac => 'naphthalene' }, { smiles => 'C=12C=CC=CC=1C=CC=C2', iupac => 'naphthalene' }, # From BBv2 P-25.2.1 { smiles => 'N1=CN=CC2=NC=CN=C12', iupac => 'pteridine' }, { smiles => 'N1=CN=C2N=CNC2=C1', iupac => 'purine' }, { smiles => 'N1N=CC2=CC=CC=C12', iupac => '1H-indazole' }, { smiles => 'C12=C(C=CN2)C=CC=C1', iupac => '1H-indole' }, { smiles => 'O1CC=CC2=C1C=CC=C2', iupac => '2H-1-benzopyran' }, { smiles => 'C1OC=CC2=C1C=CC=C2', iupac => '1H-2-benzopyran' }, { smiles => 'C1[Se]C=CC2=C1C=CC=C2', iupac => '1H-2-benzoselenopyran' }, { smiles => 'N1=CC=CC2=CC=CC=C12', iupac => 'quinoline' }, { smiles => 'P1C=CC2=CC=CC=C12', iupac => 'phosphindole' }, # Not sure if H prefix is not needed { smiles => 'C1=CC=C2C(=C1)C(=C(P2(Cl)(Cl)Cl)Cl)Cl', iupac => '1,1,1,2,3-pentachlorophosphindole' }, # PubChem 2784508 { smiles => 'C1=CC=C2C(C(C=CC2=C1)O)O', iupac => 'naphthalene-1,2-diol' }, # PubChem 362 has 1,2-dihydronaphthalene-1,2-diol, which is clearly incorrect { smiles => 'C12(C(C(C(C(C1(F)F)(F)F)(F)F)(F)F)(C(C(C(C2(F)F)(F)F)(F)F)(F)F)F)F', iupac => '1,1,2,2,3,3,4,4,4a,5,5,6,6,7,7,8,8,8a-octadecafluoronaphthalene' }, # PubChem 9386 { smiles => 'C1=CC=CC=CC2=CC=CC=CC=C12', iupac => 'octalene' }, # From BBv2 P-25.1.2.3 # From BBv2 P-25.2.2.4 { smiles => 'C1=COC=CC2=C1C=CC=C2', iupac => '3-benzoxepine' }, { smiles => 'O1C=CC2=C1C=CC=C2', iupac => '1-benzofuran' }, { smiles => 'C=1OC=C2C1C=CC=C2', iupac => '2-benzofuran' }, { smiles => 'C1=CC=COC=CC=CC=COC=CC=CC2=C1C=CC=C2', iupac => '5,12-benzodioxacyclooctadecine', AUTHOR => 1 }, { smiles => 'C1=C(C=CC2=CC=CC=C12)CCCO', iupac => '3-(naphthalen-2-yl)propan-1-ol' }, # From P-15.6.1.2 { smiles => 'C1=CC2=C(C=C1O)C(=CN2)CCN', iupac => '3-(2-aminoethyl)-1H-indol-5-ol' }, # serotonin # Guanine { smiles => 'O=C1c2ncnc2nc(N)N1', iupac => '2-amino-1,9-dihydro-6H-purin-6-one', AUTHOR => 1 }, # FIXME: fails due to aromaticity { smiles => 'NC=1NC(C=2N=CNC2N1)=O', iupac => '2-amino-1,9-dihydro-6H-purin-6-one', AUTHOR => 1 }, { smiles => 'N1C(N)=NC=2N=CNC2C1=O', iupac => '2-amino-1,9-dihydro-6H-purin-6-one', AUTHOR => 1 }, { smiles => 'CCN1C=NC2=C(N=CN=C21)N', iupac => '9-ethylpurin-6-amine' }, # PubChem 7 { smiles => 'CCCCCNC1=C(C=NC2=CC=CC=C21)[N+](=O)[O-]', iupac => '3-nitro-N-pentylquinolin-4-amine' }, # PubChem 21355672 { smiles => 'CC1=C(C2=C(C=C1)C=C(S2)O)CO', iupac => '7-(hydroxymethyl)-6-methyl-1-benzothiophen-2-ol' }, # PubChem 130803523 { smiles => 'COC1=C(C2=C(C=C1)SC(=N2)N)Br', iupac => '4-bromo-5-methoxy-1,3-benzothiazol-2-amine' }, # PubChem 131985781 { smiles => 'C1(=CC=C(C=2C(=CC=C(C12)C(=O)O)C(=O)O)C(=O)O)C(=O)O', iupac => 'naphthalene-1,4,5,8-tetracarboxylic acid' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/28_cyanide.t000066400000000000000000000024751452012116100172050ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-66.5.1.1.1 { smiles => 'C(CCCCC)#N', iupac => 'hexanenitrile', AUTHOR => 1 }, { smiles => 'C(CCCC#N)#N', iupac => 'pentanedinitrile', AUTHOR => 1 }, { smiles => 'C(CCC)(C#N)(C#N)C#N', iupac => 'butane-1,1,1-tricarbonitrile' }, # BBv2 P-66.5.1.1.2 # From BBv2 P-66.5.1.1.3 { smiles => '[SiH3]C#N', iupac => 'silanecarbonitrile' }, { smiles => 'C1(CCCCC1)C#N', iupac => 'cyclohexanecarbonitrile' }, { smiles => 'N1(CCCCC1)C#N', iupac => 'piperidine-1-carbonitrile' }, # From BBv2 P-66.5.1.1.4 { smiles => 'C(#N)C1=CC=C(O1)C(=O)O', iupac => '5-cyanofuran-2-carboxylic acid' }, { smiles => 'C(#N)CCC(=O)O', iupac => '3-cyanopropanoic acid' }, { smiles => 'C(#N)CC(CCC#N)CCC#N', iupac => '4-(cyanomethyl)heptanedinitrile', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/29_nonfused_cycles.t000066400000000000000000000052671452012116100207570ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'ClC1=NC=CC(=C1)OC1=NC=C(C=C1)Cl', iupac => '2-chloro-4-[(5-chloropyridin-2-yl)oxy]pyridine', AUTHOR => 1 }, # BBv2 P-15.3.2.4.2 { smiles => 'C(C1=CC=CC=C1)C1=NC=CC=C1', iupac => '2-benzylpyridine' }, # BBv2 P-29.6.1 { smiles => 'SC1=CC=C(C=C1)SSC=1C=C(C=CC1)S', iupac => '3-[(4-sulfanylphenyl)disulfanyl]benzene-1-thiol', AUTHOR => 1 }, # BBv2 P-63.1.5 { smiles => 'C1(=CC=CC=C1)SC1CCNCC1', iupac => '4-(phenylsulfanyl)piperidine', AUTHOR => 1 }, # BBv2 P-63.2.5 # From BBv2 P-63.2.4.2 { smiles => 'C1(CCCCC1)OC1=CC=CC=C1', iupac => '(cyclohexyloxy)benzene', AUTHOR => 1 }, # FIXME: Very close { smiles => 'N1=CC(=CC=C1)OC1=NC=CN=C1', iupac => '2-[(pyridin-3-yl)oxy]pyrazine' }, { smiles => 'c1ncccc1[C@@H]2CCCN2C', iupac => '3-[(2S)-1-methylpyrrolidin-2-yl]pyridine', AUTHOR => 1 }, # nicotine { smiles => 'C1=CSC(=C1)SSC2=CC=CS2', iupac => '2-(thiophen-2-yldisulfanyl)thiophene' }, # PubChem 23347 { smiles => 'CC1C(=O)NC(C(=O)N1C(C)C(C)C)C2CCCCC2', iupac => '3-cyclohexyl-6-methyl-1-(3-methylbutan-2-yl)piperazine-2,5-dione' }, # PubChem 64959818 { smiles => 'C1=CC(=C(N=C1)C2=NC(=NS2)N)Br', iupac => '5-(3-bromopyridin-2-yl)-1,2,4-thiadiazol-3-amine' }, # PubChem 107526369 { smiles => 'C1=CC(=NC(=C1N)C2=CC(=C(C=C2Cl)Cl)Cl)C(=O)O', iupac => '5-amino-6-(2,4,5-trichlorophenyl)pyridine-2-carboxylic acid', AUTHOR => 1 }, # PubChem 133086582 { smiles => 'CC1CCC(CC(C1)C)C2CCCCC2', iupac => '1-cyclohexyl-3,5-dimethylcycloheptane' }, # PubChem 149225482 { smiles => 'CC1=CC(=C(C=C1)C(C)(C)C2=CC(=C(C=C2)C)C)C', iupac => '1-[2-(3,4-dimethylphenyl)propan-2-yl]-2,4-dimethylbenzene' }, # PubChem 54559144 { smiles => 'CCOC1=C(C(=CC(=C1)CNCC2=NN=C(N2C)C)Cl)OC', iupac => '1-(3-chloro-5-ethoxy-4-methoxyphenyl)-N-[(4,5-dimethyl-1,2,4-triazol-3-yl)methyl]methanamine', AUTHOR => 1 }, # PubChem 56822512 # incorrectly selected parent chain { smiles => 'C1CC1=CC2=CC=CC=C2Cl', iupac => '1-chloro-2-(cyclopropylidenemethyl)benzene' }, # PubChem 54594307 { smiles => 'COC1=NN=C(C=C1C(=O)O)C2=CC=CC=N2', iupac => '3-methoxy-6-pyridin-2-ylpyridazine-4-carboxylic acid', AUTHOR => 1 }, # PubChem 117127049 # differs in brackets ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/30_guanidine.t000066400000000000000000000020361452012116100175160ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'NC(=N)N', iupac => 'guanidine' }, # From BBv2 P-66.4.1.2.1.2 { smiles => 'CN(C(=NC1=CC=CC=C1)N(C)C)C', iupac => "N,N,N',N'-tetramethyl-N''-phenylguanidine" }, { smiles => 'CN(C(=N)NC)C', iupac => "N,N,N'-trimethylguanidine" }, { smiles => 'NC(N)=NCCCC(=O)O', iupac => '4-[(diaminomethylidene)amino]butanoic acid' }, # From BBv2 P-66.4.1.2.1.3 { smiles => 'NC(C(=O)O)CCCNC(N)=N', iupac => '2-amino-5-(carbamimidoylamino)pentanoic acid' }, # From BBv2 P-103.1.1.1, arginine ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/31_ethers.t000066400000000000000000000023631452012116100170510ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-63.2.4.1 { smiles => 'COC', iupac => 'methoxymethane' }, { smiles => 'CCOC', iupac => 'methoxyethane' }, { smiles => 'C1(=CC=CC=C1)OC', iupac => 'anisole' }, { smiles => 'COC1=CC2=CC=CC=C2C=C1', iupac => '2-methoxynaphthalene' }, { smiles => 'ClCCOCC', iupac => '1-chloro-2-ethoxyethane', AUTHOR => 1 }, { smiles => 'COCCOC', iupac => '1,2-dimethoxyethane', AUTHOR => 1 }, { smiles => 'COCCOCCOC', iupac => '1-methoxy-2-(2-methoxyethoxy)ethane', AUTHOR => 1 }, # From P-63.2.5 { smiles => 'CSC1=CC=CC=C1', iupac => '(methylsulfanyl)benzene', AUTHOR => 1 }, { smiles => 'ClC1=CC=C(C=C1)[Se]CCl', iupac => '1-chloro-4-[(chloromethyl)selanyl]benzene', AUTHOR => 1 }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/32_amides.t000066400000000000000000000106021452012116100170150ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'C(C)N(C(=O)C=1OC=CC1)CC', iupac => 'N,N-diethylfuran-2-carboxamide' }, # From BBv2 P-16.2.1 # From BBv2 P-62.2.3 { smiles => 'N(C1=CC=CC=C1)C=1C=C(C(=O)O)C=CC1', iupac => '3-anilinobenzoic acid', AUTHOR => 1 }, { smiles => 'CN(C1=CC=CC=C1)C=1C=C(C=CC1)O', iupac => '3-(N-methylanilino)phenol', AUTHOR => 1 }, { smiles => 'S(S)C=1C=C(C(=O)N)C=CC1SS', iupac => '3,4-bis(disulfanyl)benzamide', AUTHOR => 1 }, # From BBv2 P-63.4.2.2 { smiles => 'ONC(=O)C1CCCCC1', iupac => 'N-hydroxycyclohexanecarboxamide' }, # From BBv2 P-65.1.3.4 # From BBv2 P-66.1.1.1.1.1 { smiles => 'C(CCCCC)(=O)N', iupac => 'hexanamide' }, { smiles => 'C(CCCC(=O)N)(=O)N', iupac => 'pentanediamide', AUTHOR => 1 }, { smiles => 'C(C(CC(=O)N)C(=O)N)C(=O)N', iupac => 'propane-1,2,3-tricarboxamide', AUTHOR => 1 }, # BBv2 P-66.1.1.1.1.2 # From BBv2 P-66.1.1.1.1.3 { smiles => 'PC(=O)N', iupac => 'phosphanecarboxamide', AUTHOR => 1 }, { smiles => 'N(N)C(=O)N', iupac => 'hydrazinecarboxamide', AUTHOR => 1 }, { smiles => 'S1C(=CC=C1)C(=O)N', iupac => 'thiophene-2-carboxamide' }, { smiles => 'N1(CCCCC1)C(=O)N', iupac => 'piperidine-1-carboxamide' }, { smiles => 'C(C1=CC=CC=C1)(=O)N', iupac => 'benzamide' }, # BBv2 P-66.1.1.1.2.1 # From BBv2 P-66.1.1.1.2.4 { smiles => 'C(C=C)(=O)N', iupac => 'prop-2-enamide' }, { smiles => 'OC(C(=O)N)C', iupac => '2-hydroxypropanamide' }, # From BBv2 P-66.1.1.3.1.1 { smiles => 'CNC(C1=CC=CC=C1)=O', iupac => 'N-methylbenzamide' }, { smiles => 'C(C)N(C(=O)C=1OC=CC1)CC', iupac => 'N,N-diethylfuran-2-carboxamide' }, # From BBv2 P-66.1.1.3.4 { smiles => 'CN(C(C1=CC=CC=C1)=O)C1=CC=CC=C1', iupac => 'N-methyl-N-phenylbenzamide' }, { smiles => 'CN(C(C1=CC=C(C=C1)C)=O)C1=CC(=CC=C1)C', iupac => 'N,4-dimethyl-N-(3-methylphenyl)benzamide' }, # From BBv2 P-66.1.1.3.5 { smiles => 'N1=CC=C(C=C1)C1=CC=C(C(=O)N)C=C1', iupac => '4-(pyridin-4-yl)benzamide' }, { smiles => 'ClC1=NC=CC=C1C(=O)N', iupac => '2-chloropyridine-3-carboxamide' }, { smiles => 'CC=1C=C(C(=CC1)C(=O)N)C(=O)N', iupac => '4-methylbenzene-1,2-dicarboxamide', AUTHOR => 1 }, { smiles => 'OC1=C(C(=O)N)C=CC=C1', iupac => '2-hydroxybenzamide' }, { smiles => 'NC=1C(=NC(=C(N1)N)Cl)C(=O)N', iupac => '3,5-diamino-6-chloropyrazine-2-carboxamide', AUTHOR => 1 }, { smiles => 'C(CNC1C(CCCC1)C(=O)N)NC1C(CCCC1)C(=O)N', iupac => '2,2\'-[ethane-1,2-diylbis(azanediyl)]di(cyclohexane-1-carboxamide)', AUTHOR => 1 }, { smiles => 'C(C1=CC=CC=C1)(=O)NC1=CC=C(C=C1)S(=O)(=O)O', iupac => '4-benzamidobenzene-1-sulfonic acid' }, # BBv2 P-66.1.1.4.3 # From BBv2 P-66.1.3 { smiles => 'N1(CCCCC1)C(C)=O', iupac => '1-(piperidin-1-yl)ethan-1-one' }, { smiles => 'N1(CCCC2=CC=CC=C12)C(CC)=O', iupac => '1-(3,4-dihydroquinolin-1(2H)-yl)propan-1-one', AUTHOR => 1 }, { smiles => 'CCC(=O)N(C1CCN(CC1)CCC2=CC=CC=C2)C3=CC=CC=C3', iupac => 'N-phenyl-N-[1-(2-phenylethyl)piperidin-4-yl]propanamide' }, # PubChem 3345 { smiles => 'CC(CCOC)NC(=O)CCCCCCN', iupac => '7-amino-N-(4-methoxybutan-2-yl)heptanamide', AUTHOR => 1 }, # PubChem 64604850 { smiles => 'C1CCN(CC1)CCCCC(=O)N=C(CC(=N)C2=CC=NC=C2)N', iupac => 'N-(1-amino-3-imino-3-pyridin-4-ylpropylidene)-5-piperidin-1-ylpentanamide', AUTHOR => 1 }, # PubChem 90937303 { smiles => 'C1CCC(CC1)N2C=C(C(=N2)C(=O)N)N', iupac => '4-amino-1-cyclohexylpyrazole-3-carboxamide' }, # PubChem 107345270 { smiles => 'CC1(CC1(Cl)Cl)C(=O)NC2=CC=CC(=C2)C3=NN=CO3', iupac => '2,2-dichloro-1-methyl-N-[3-(1,3,4-oxadiazol-2-yl)phenyl]cyclopropane-1-carboxamide' }, # PubChem 43061667 { smiles => 'C1CC1N(CC2=CN=CC=C2)C(=O)C3CSCN3', iupac => 'N-cyclopropyl-N-(pyridin-3-ylmethyl)-1,3-thiazolidine-4-carboxamide' }, # PubChem 54869854 { smiles => 'C(C)(=O)N(C(C1=CC=CC=C1)=O)C(CCCl)=O', iupac => 'N-acetyl-N-(3-chloropropanoyl)benzamide', AUTHOR => 1 }, # BBv2 P-66.1.2.1 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/33_hydrazines.t000066400000000000000000000022241452012116100177350ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-68.3.1.2.1 { smiles => 'CN(N)C', iupac => '1,1-dimethylhydrazine' }, { smiles => 'C1(=CC=CC=C1)NN', iupac => 'phenylhydrazine' }, { smiles => 'N(N)CN' => iupac => '1-hydrazinylmethanamine' }, { smiles => 'N(N)C(=O)O', iupac => 'hydrazinecarboxylic acid', AUTHOR => 1 }, { smiles => 'FN(N(F)F)F', iupac => 'tetrafluorohydrazine' }, { smiles => 'N(N)CC#N', iupac => 'hydrazinylacetonitrile', AUTHOR => 1 }, # From BBv2 P-68.3.1.2.2 { smiles => 'C(CC)=NN', iupac => 'propylidenehydrazine' }, { smiles => 'CN(N=C(C)C)C', iupac => '1,1-dimethyl-2-(propan-2-ylidene)hydrazine' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/34_sulfoxides.t000066400000000000000000000015431452012116100177460ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-63.6 { smiles => 'C(C)S(=O)CCCC', iupac => '1-(ethanesulfinyl)butane' }, { smiles => 'C(C)[Se](=O)C1=CC=CC=C1', iupac => '(ethaneseleninyl)benzene', AUTHOR => 1 }, { smiles => 'C1(CCCCC1)S(=O)N1CC(OCC1)C(=O)O', iupac => '4-(cyclohexanesulfinyl)morpholine-2-carboxylic acid' }, # BBv2 P-65.4.1 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/35_acids.t000066400000000000000000000021021452012116100166350ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'C(O)S(=O)O', iupac => 'hydroxymethanesulfinic acid' }, # PubChem 9000 { smiles => 'S(=O)(O)CC(=O)O', iupac => 'sulfinoacetic acid' }, # From BBv2 P-65.3.2.1 # From BBv2 P-65.3.1 { smiles => 'C1(=CC=CC=C1)S(=O)(=O)O', iupac => 'benzenesulfonic acid' }, { smiles => 'CC(CC)S(=O)O', iupac => 'butane-2-sulfinic acid' }, { smiles => 'CC1=C(C=C(C=C1)S(=O)(=O)O)S(=O)(=O)O', iupac => '4-methylbenzene-1,3-disulfonic acid' }, { smiles => 'NC1=CC=C(C=C1)S(=O)(=O)O', iupac => '4-aminobenzene-1-sulfonic acid' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/36_acyl_halides.t000066400000000000000000000020231452012116100201760ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-65.5.1 { smiles => 'C(CCCCC)(=O)F', iupac => 'hexanoyl fluoride' }, { smiles => 'C(CC(=O)Cl)(=O)Cl', iupac => 'propanedioyl dichloride', AUTHOR => 1 }, { smiles => 'C1(=CC=C(C=C1)C(=O)Cl)C(=O)Cl', iupac => 'benzene-1,4-dicarbonyl dichloride', AUTHOR => 1 }, { smiles => 'C(CCC(=O)Cl)(=O)Br', iupac => 'butanedioyl bromide chloride', AUTHOR => 1 }, { smiles => 'C(C(=O)F)(C(F)(F)F)O', iupac => '3,3,3-trifluoro-2-hydroxypropanoyl fluoride' }, # PubChem 53938350 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/37_fusion.t000066400000000000000000000050551452012116100170710ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-25.3.1.3 { smiles => '[Se]1C=CC2=C1[Se]C=C2', iupac => 'selenopheno[2,3-b]selenophene' }, { smiles => '[Se]1C=2C(C=C1)=C[Se]C2', iupac => 'selenopheno[3,4-b]selenophene', AUTHOR => 1 }, { smiles => '[Se]1C2=C(C=C1)[Se]C=C2', iupac => 'selenopheno[3,2-b]selenophene' }, # From BBv2 P-25.3.2.4 { smiles => 'S1CC=CSC=2C1=COC2', iupac => '2H-[1,4]dithiepino[2,3-c]furan', AUTHOR => 1 }, { smiles => 'O1CC=C2OC=CC=C21', iupac => '2H-furo[3,2-b]pyran', AUTHOR => 1 }, { smiles => 'N1=CC=CC2=C1C=NOC2', iupac => '5H-pyrido[2,3-d][1,2]oxazine', AUTHOR => 1 }, { smiles => 'O1COC2=C1C=CO2', iupac => '2H-furo[2,3-d][1,3]dioxole' }, { smiles => 'O1P=CC2=C1OCO2', iupac => '5H-[1,3]dioxolo[4,5-d][1,2]oxaphosphole' }, { smiles => 'S1C=NC2=C1N=C[Se]2', iupac => '[1,3]selenazolo[5,4-d][1,3]thiazole' }, { smiles => 'O1C2=C(SC=C1)[Se]C=CO2', iupac => '[1,4]oxaselenino[2,3-b][1,4]oxathiine' }, { smiles => 'N1=CC=NC=2C1=CN=NC2', iupac => 'pyrazino[2,3-d]pyridazine' }, { smiles => 'O1SNC2=C1ONS2', iupac => '3H,5H-[1,3,2]oxathiazolo[4,5-d][1,2,3]oxathiazole' }, { smiles => 'S1C=2N(C=C1)C=CN2', iupac => 'imidazo[2,1-b][1,3]thiazole' }, # From BBv2 P-25.3.2.5.1 # From BBv2 P-25.3.3.1.2 { smiles => 'O1C=2C(=CC=C1)C=CC2', iupac => 'cyclopenta[b]pyran', AUTHOR => 1 }, { smiles => 'S1COC=2NC=CC21', iupac => '2H,4H-[1,3]oxathiolo[5,4-b]pyrrole', AUTHOR => 1 }, { smiles => 'O1C2=C(C=C1)C=CS2', iupac => 'thieno[2,3-b]furan' }, { smiles => 'N1C=NC2=C1C=CS2', iupac => '1H-thieno[2,3-d]imidazole', AUTHOR => 1 }, { smiles => 'N=1N2C(N=CC1)=NC=C2', iupac => 'imidazo[1,2-b][1,2,4]triazine' }, { smiles => 'O1COC2=C1N=CN2', iupac => '2H,4H-[1,3]dioxolo[4,5-d]imidazole', AUTHOR => 1 }, { smiles => 'CN(C=1C2=C(N=CN1)NC=C2)C', iupac => 'N,N-dimethyl-7H-pyrrolo[2,3-d]pyrimidin-4-amine', AUTHOR => 1 }, # PubChem 1861 { smiles => 'C1(=CC=CC=C1)C1=CC2=C(N=CN=C2N)N1C1=CC=CC=C1', iupac => '6,7-diphenylpyrrolo[2,3-d]pyrimidin-4-amine', AUTHOR => 1 }, # PubChem 49845043 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/38_xanthenes.t000066400000000000000000000040061452012116100175570ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-25.2.1 { smiles => 'C1=CC=CC2=NC3=CC=CC=C3C=C12', iupac => 'acridine' }, { smiles => 'C1=CC=CC=2OC3=CC=CC=C3CC12', iupac => '9H-xanthene' }, # From BBv2 P-25.2.2.2 { smiles => 'C1=CC=CC=2OC3=CC=CC=C3OC12', iupac => 'oxanthrene' }, { smiles => 'C1=CC=CC2=NC3=CC=CC=C3N=C12', iupac => 'phenazine' }, { smiles => 'C1=CC=CC=2OC3=CC=CC=C3C(C12)=O', iupac => '9H-xanthen-9-one' }, # From Wikipedia Xanthone { smiles => 'OC1=CC(=CC=2OC3=CC(=CC(=C3C(C12)=O)C)OC)OC', iupac => '1-hydroxy-3,6-dimethoxy-8-methyl-9H-xanthen-9-one' }, # From Wikipedia Lichexanthone { smiles => 'C1=CC=CC=2SC3=CC=CC=C3C(C12)=O', iupac => '9H-thioxanthen-9-one' }, # From Wikipedia Thioxanthone { smiles => 'CC(C)C1=CC=CC=2C(C3=CC=CC=C3SC12)=O', iupac => '4-(propan-2-yl)-9H-thioxanthen-9-one' }, # From Wikipedia Isopropylthioxanthone { smiles => 'ClC1=CC=2OC3=CC(=C(C=C3OC2C=C1Cl)Cl)Cl', iupac => '2,3,7,8-tetrachlorooxanthrene', AUTHOR => 1 }, # From Wikipedia 2,3,7,8-Tetrachlorodibenzodioxin { smiles => 'ClC=1C(=C(C(=C2OC=3C(=C(C(=C(C3OC12)Cl)Cl)Cl)Cl)Cl)Cl)Cl', iupac => 'octachlorooxanthrene', AUTHOR => 1 }, # From Wikipedia Octachlorodibenzodioxin { smiles => 'ClC1=C(C(=C(C=2OC3=C(C(=C(C=C3OC12)Cl)Cl)Cl)Cl)Cl)Cl', iupac => '1,2,3,4,6,7,8-heptachlorooxanthrene', AUTHOR => 1 }, # From Wikipedia Heptachlorodibenzo-p-dioxin { smiles => 'C1CCN(CC1)CCCC2C3=CC=CC=C3SC4=CC=CC=C24', iupac => '1-[3-(9H-thioxanthen-9-yl)propyl]piperidine' }, # PubChem 155569813 ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/39_polyacenes.t000066400000000000000000000015341452012116100177300ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-25.1.2.1 { smiles => 'C1=CC=CC2=CC3=CC4=CC=CC=C4C=C3C=C12', iupac => 'tetracene' }, { smiles => 'C1=CC=CC2=CC3=CC4=CC5=CC=CC=C5C=C4C=C3C=C12', iupac => 'pentacene' }, { smiles => 'CC1=C2C=C3C=CC=CC3=CC2=CC2=CC3=CC=CC=C3C=C12', iupac => '6-methylpentacene' }, # From Wikipedia Pentacene ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/40_polyaphenes.t000066400000000000000000000021071452012116100201020ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'OC1=C2C(C=3C=CC=CC3C(C2=C2C(C3=CC=CC=C3C(C2=C1O)=O)=O)=O)=O', iupac => '6,7-dihydroxypentaphene-5,8,13,14-tetrone' }, # PubChem 5379520 { smiles => 'CC1=C2C(C=3C=CC=CC3C(C2=C2C(C3=CC=CC=C3C(C2=C1)=O)=O)=O)=O', iupac => '6-methylpentaphene-5,8,13,14-tetrone' }, # PubChem 93949195 { smiles => 'FC1=C(C2=C(C3=C4C(=C5C(=C6C(=C(C(=C(C6=C(C5=C(C4=C(C(=C3C(=C2C(=C1F)F)F)F)F)F)F)F)F)F)F)F)F)F)C1=CC=CC=C1', iupac => '2,3,4,5,6,7,8,9,10,11,12,13,14,15,16-pentadecafluoro-1-phenylhexaphene', AUTHOR => 1 }, # PubChem 121333830 - flaky ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/41_isotopes.t000066400000000000000000000025131452012116100174220ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( # From BBv2 P-82.2.1 { smiles => '[14CH4]', iupac => '(14C)methane' }, { smiles => 'C[2H]', iupac => '(2H1)methane' }, { smiles => 'C1(=CC=CC=C1)[13C]([13CH3])=O', iupac => '1-phenyl(1,2-13C2)ethan-1-one' }, { smiles => '[13CH3]C1=C(C=CC=C1)[13CH3]', iupac => '1,2-di[(13C)methyl]benzene', AUTHOR => 1 }, { smiles => '[13CH3]C1=[13CH]C=CC=C1', iupac => '1-(13C)methyl(2-13C)benzene', AUTHOR => 1 }, { smiles => 'N[14CH2]C1(CCCC1)O', iupac => '1-[amino(14C)methyl]cyclopentan-1-ol' }, { smiles => 'S1C([14CH2]CC1)C1=CC=NC=C1', iupac => '4-[(3-14C)thiolan-2-yl]pyridine' }, # From BBv2 P-82.2.2.1 { smiles => '[13CH3]C1=NC=CC=C1C', iupac => '2-(13C)methyl-3-methylpyridine' }, { smiles => 'C(C([2H])[2H])C(CO)C(CCC)CC', iupac => '2-(2,2-2H2)ethyl-3-ethylhexan-1-ol' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/42_phenanthrenes.t000066400000000000000000000022541452012116100204220ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'C1=CC=CC=2C3=CC=CC=C3C=CC12', iupac => 'phenanthrene' }, # From BBv2 P-25.2.1 { smiles => 'N1=CC=CC2=CC=C3N=CC=CC3=C12', iupac => '1,7-phenanthroline' }, { smiles => 'C1=CC=CC2=NC=C3C=CC=CC3=C12', iupac => 'phenanthridine' }, { smiles => 'COC=1C(=CC=2C=C(C3=CC(=CC=C3C2C1OC)O)OC)O', iupac => '3,4,9-trimethoxyphenanthrene-2,7-diol' }, # From Wikipedia Gymnopusin { smiles => 'C1=CC=CC=2C3=CC=CC=C3C(C(C12)=O)=O', iupac => 'phenanthrene-9,10-dione' }, # From Wikipedia Phenanthrenequinone { smiles => 'C1=CC=NC=2C(C(C3=NC=CC=C3C12)=O)=O', iupac => '4,7-phenanthroline-5,6-dione' }, # From Wikipedia Phanquinone ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; } ChemOnomatopist-0.6.1/t/43_porphyrins.t000066400000000000000000000012001452012116100177640ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use ChemOnomatopist; use Test::More; my @cases = ( { smiles => 'C12=CC=C(N1)C=C1C=CC(=N1)C=C1C=CC(N1)=CC=1C=CC(N1)=C2', iupac => 'porphyrin' }, ); @cases = grep { !exists $_->{AUTHOR} } @cases unless $ENV{AUTHOR_TESTING}; plan skip_all => 'No available cases' unless @cases; plan tests => scalar @cases; for my $case (@cases) { my $ok; eval { $ok = is ChemOnomatopist::get_name( $case->{smiles} ), $case->{iupac}, $case->{smiles} }; $@ =~ s/\n$// if $@; fail $case->{smiles} . ": $@" if $@; diag 'test supposed to fail with AUTHOR_TESTING' if $case->{AUTHOR} && $ok; }