Chemistry-OpenSMILES-0.11.6/0000775000200400020040000000000014753573665015267 5ustar andriusandriusChemistry-OpenSMILES-0.11.6/dist.ini0000644000200400020040000000147014753573665016733 0ustar andriusandriusname = Chemistry-OpenSMILES author = Andrius Merkys license = BSD copyright_holder = Andrius Merkys copyright_year = 2020-2025 version = 0.11.6 [@Filter] -bundle = @Basic -remove = MakeMaker -remove = License [AutoMetaResources] homepage = https://search.cpan.org/dist/%{dist} repository.github = user:merkys bugtracker.github = user:merkys [MetaJSON] [MinimumPerlFast] [ModuleBuild] mb_class = Module::Build::Parse::Yapp build_element = yp [OurPkgVersion] [Prereqs] Chemistry::Elements = 0 Graph = 0.97 List::Util = 1.45 Parse::Yapp = 0 Set::Object = 0 [Prereqs / Build] -phase = build Module::Build::Parse::Yapp = 0.1.2 [Prereqs / Configure] -phase = configure Module::Build::Parse::Yapp = 0.1.2 [Prereqs / Test] -phase = test Algorithm::Combinatorics = 0 Data::Dumper = 0 Test::More = 0 Chemistry-OpenSMILES-0.11.6/LICENSE0000644000200400020040000000273314753573665016277 0ustar andriusandriusCopyright (c) The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Chemistry-OpenSMILES-0.11.6/Changes0000644000200400020040000001652314753573665016567 0ustar andriusandrius0.11.6 2025-02-14 - Restoring incompatibility with Perl Graph package versions before v0.9727 which have a bug in bridges(). - Limiting the set of allowed aromatic elements. 0.11.5 2025-02-13 - Fixing a bug failing to recognise @TB10. - Forbidding unknown chemical elements. - Forbidding ring bonds to self. - Accepting order subroutine in kekulise() in order to make the algorithm stable. - Supporting octahedral chirality in chirality_to_pseudograph(). - Introducing Chemistry::OpenSMILES::is_aromatic_bond(). - Reporting aromatic bonds outside aromatic rings in _validate(). 0.11.4 2025-02-04 - Reducing false-positive cis/trans conflicts detected in _validate(). - Fixing a bug causing allene systems to be validated more times than needed in _validate(). 0.11.3 2025-02-03 - Respecting and validating allene systems in _validate(). - Better validation of cis/trans and chirality settings in _validate(). 0.11.2 2025-01-16 - Fixing nondeterminism in relative cis/trans marker placement. 0.11.1 2025-01-14 - Recognising unimportant anomer chiral centers. 0.11.0 2025-01-06 - Fixing handling of octahedral chirality. - Implementing an alternative method to mark cis/trans settings by giving their list instead of providing a callback. 0.10.0 2024-11-28 - Reworking the logic of marking atoms with nonstandard valences. - Introducing Chemistry::OpenSMILES::valence(). - Changing the API of Chemistry::OpenSMILES::Writer::write_SMILES() to accept option hash as the second parameter. Backwards compatibility for old calls is retained. 0.9.2 2024-11-26 - Fix a pair of bugs in valence calculation. 0.9.1 2024-11-26 - Handling trigonal bipyramidal centers. - Exporting all Chemistry::OpenSMILES::is_chiral_*() subroutines. - Fixing a bug where H atom counts of 0 given in square brackets were not preserved. 0.9.0 2023-10-25 - Handling square planar chirality. 0.8.6 2023-06-19 - Implementing is_ring_atom(). - No longer removing chiral centers from atoms in rings. - Accepting coloring subroutine references in is_unimportant_double_bond() and mark_all_double_bonds(). - Returning false instead of undefined value. - Exporting %bond_order_to_symbol and %bond_symbol_to_order to simplify conversion between bond symbols and orders. 0.8.5 2023-01-26 - Fixing incorrect chirality handling code as it did not consider all ring bonds up to now. 0.8.4 2022-11-29 - Ceasing to mark unimportant double bonds in Chemistry::OpenSMILES::Stereo. 0.8.3 2022-11-03 - Exporting %Chemistry::OpenSMILES::normal_valence. 0.8.2 2022-09-20 - Adding Chemistry::OpenSMILES::Aromaticity and Chemistry::OpenSMILES::Stereo from smiles-scripts project v0.2.0. - Fixing prototypes of recently added functions. - Introducing Chemistry::OpenSMILES::is_double_bond(). - Introducing Chemistry::OpenSMILES::is_ring_bond(). 0.8.1 2022-05-11 - Fixing a bug with incomplete cis/trans markers on ring bonds. - Introducing Chemistry::OpenSMILES::toggle_cistrans(). - Introducing two functions for bond type determination. 0.8.0 2022-05-05 - Implementing support for tetrahedral chiral centers having lone pairs. - Safeguarding against infinite cycles in Chemistry::OpenSMILES::Writer::_permutation_order(). - Fixing mistake in Chemistry::OpenSMILES::Parser::parse() documentation. 0.7.0 2021-10-30 - Fixing important issue with representing tetrahedral chiral centers by introducing 'chirality_neighbours' key to atom hashes. 0.6.0 2021-10-18 - Allowing multi-digit hydrogen counts if enabled via parser option. - Standardizing chiral markers for allenal carbons. - Not standardizing chiral markers in raw output. - Reporting '@' and '@@' chiralities with other than 4 neighbours as they cannot be processed right now. 0.5.2 2021-10-11 - Creating explicitly refvertexed Graph objects to get around all the problems caused by interpreting them as scalar values. 0.5.1 2021-08-29 - Working around an incompatibility with Graph v0.9721. 0.5.0 2021-08-26 - Handling raw parsing output in Chemistry::OpenSMILES::clean_chiral_centers(). - Fixing a bug in position counting due to $1 pollution. - Detecting and reporting disconnected parts of moieties. - Introducing Chemistry::OpenSMILES::is_chiral() to detect both chiral atoms and moieties. - Fixing bug in Chemistry::OpenSMILES::clean_chiral_centers() which caused removal of all chiral centers, not just tetrahedral. - Introducing Chemistry::OpenSMILES::mirror() to invert chiral centers, currently only tetrahedral. - Fixing bug in printing of chiral centers, previously only tetrahedral centers were printed. 0.4.6 2021-04-14 - Fixing reading/writing of cis/trans ring bonds. 0.4.5 2021-03-25 - Fixing a bug in aromatic bond detection. 0.4.4 2021-03-09 - Introducing Chemistry::OpenSMILES::clean_chiral_centers() to remove unnecessary tetrahedral chiral settings. 0.4.3 2021-01-25 - Supporting tetrahedral chirality in Chemistry::OpenSMILES::Writer::write_SMILES(). - Chiralities @TH1 and @TH2 are the same as @ and @@, respectively. - Specifying the requirement of Graph v0.97 or later. - Fixing issue with homepage. 0.4.2 2021-01-20 - Exporting Chemistry::OpenSMILES::Writer::write_SMILES(). - Removing chirality information from written SMILES, as code to write this reliably is not yet available. - Fixing writing of cis/trans bonds. 0.4.1 2021-01-12 - Fixing an issue with 'scalar %hash' in t/01_atoms.t. - Listing a dependency on Graph::Traversal::DFS. 0.4.0 2021-01-11 - Adding Chemistry::OpenSMILES::Writer, implementing a SMILES writer for the data structures built by Chemistry::OpenSMILES::Parser. - Converting 'charge' atom field to integer. - Exporting Chemistry::OpenSMILES::is_aromatic(). 0.3.2 2020-10-05 - Explicitly setting bond order to ':' on bonds between aromatic atoms. 0.3.1 2020-09-17 - Establishing deterministic order while generating graph vertices for implicit hydrogen atoms. 0.3.0 2020-09-16 - Deriving counts of implicit hydrogen atoms. - Default number of attached hydrogen atoms for atoms in square brackets is 0. - Supporting bracketless '*' atom. 0.2.1 2020-08-13 - Adding 'number' atom field to store atom positions in the initial SMILES string. - Making 'class' atom field mandatory. - Making 'isotope' atom field an integer. 0.2.0 2020-05-20 - Fixing an issue with graph merging. - Unifying the representation of single bonds. - Extending POD documentation. 0.1.3 2020-05-19 - Detecting and reporting unbalanced parentheses. - Detecting and reporting unclosed ring bonds. - Fixing position pointers in error messages, removing line numbers. - Adding versioned dependency on Module::Build::Parse::Yapp v0.1.2. - Declaring minimum Perl version. - Fixing the POD. 0.1.2 2020-05-17 - Turning hydrogen counts into real vertices. If 'raw' option is present, hydrogen counts are left as integers instead. 0.1.1 2020-05-14 - Adding missing dependency on Graph::Undirected. - Adding POD. 0.1.0 2020-05-13 - Initial release. Chemistry-OpenSMILES-0.11.6/README0000644000200400020040000000050314753573665016143 0ustar andriusandriusThis archive contains the distribution Chemistry-OpenSMILES, version 0.11.6: OpenSMILES format reader and writer This software is Copyright (c) 2020-2025 by Andrius Merkys. This is free software, licensed under: The (three-clause) BSD License This README file was generated by Dist::Zilla::Plugin::Readme v6.012. Chemistry-OpenSMILES-0.11.6/Build.PL0000644000200400020040000000274214753573665016566 0ustar andriusandrius # This file was automatically generated by Dist::Zilla::Plugin::ModuleBuild v6.012. use strict; use warnings; use Module::Build 0.28; use lib qw{inc}; use Module::Build::Parse::Yapp; my %module_build_args = ( "build_requires" => { "Module::Build" => "0.28", "Module::Build::Parse::Yapp" => "v0.1.2" }, "configure_requires" => { "Module::Build" => "0.28", "Module::Build::Parse::Yapp" => "v0.1.2" }, "dist_abstract" => "OpenSMILES format reader and writer", "dist_author" => [ "Andrius Merkys " ], "dist_name" => "Chemistry-OpenSMILES", "dist_version" => "0.11.6", "license" => "bsd", "module_name" => "Chemistry::OpenSMILES", "recursive_test_files" => 1, "requires" => { "Chemistry::Elements" => 0, "Graph" => "0.97", "List::Util" => "1.45", "Parse::Yapp" => 0, "Set::Object" => 0, "perl" => "5.010000" }, "test_requires" => { "Algorithm::Combinatorics" => 0, "Data::Dumper" => 0, "Test::More" => 0 } ); my %fallback_build_requires = ( "Algorithm::Combinatorics" => 0, "Data::Dumper" => 0, "Module::Build" => "0.28", "Module::Build::Parse::Yapp" => "v0.1.2", "Test::More" => 0 ); unless ( eval { Module::Build->VERSION(0.4004) } ) { delete $module_build_args{test_requires}; $module_build_args{build_requires} = \%fallback_build_requires; } my $build = Module::Build::Parse::Yapp->new(%module_build_args); $build->add_build_element($_) for qw(yp); $build->create_build_script; Chemistry-OpenSMILES-0.11.6/MANIFEST0000644000200400020040000000224214753573665016416 0ustar andriusandrius# This file was automatically generated by Dist::Zilla::Plugin::Manifest v6.012. Build.PL Changes LICENSE MANIFEST META.json META.yml README dist.ini lib/Chemistry/OpenSMILES.pm lib/Chemistry/OpenSMILES/Aromaticity.pm lib/Chemistry/OpenSMILES/Parser.yp lib/Chemistry/OpenSMILES/Stereo.pm lib/Chemistry/OpenSMILES/Stereo/Tables.pm lib/Chemistry/OpenSMILES/Writer.pm t/01_atoms.t t/02_chains.t t/03_hcount.t t/04_errors.t t/05_orders.t t/06_write.t t/07_cistrans.t t/08_permutation_order.t t/09_tetrahedral_chirality.t t/10_validate.t t/11_validate_color.t t/12_tetrahedral_chirality.t t/13_clean_chiral_centers.t t/14_write_disconnected.t t/15_chirality.t t/16_unsupported_chirality.t t/17_max_hydrogen_count_digits.t t/18_chirality_reference.t t/19_lone_pairs.t t/20_aromaticity.t t/21_stereo.t t/22_rings.t t/23_clean_chiral_centers.t t/24_square_planar_chirality.t t/25_trigonal_bipyramidal_chirality.t t/26_octahedral_chirality.t t/27_valence.t t/28_octahedral_chirality.t t/29_trigonal_bipyramidal_chirality.t t/30_trigonal_bipyramidal_chirality.t t/31_write.t t/32_write.t t/33_allene_graph.t t/34_validate_allenes.t t/35_chirality_to_pseudograph.t t/36_kekulise.t t/37_elements.t Chemistry-OpenSMILES-0.11.6/META.yml0000644000200400020040000000173514753573665016544 0ustar andriusandrius--- abstract: 'OpenSMILES format reader and writer' author: - 'Andrius Merkys ' build_requires: Algorithm::Combinatorics: '0' Data::Dumper: '0' Module::Build: '0.28' Module::Build::Parse::Yapp: v0.1.2 Test::More: '0' configure_requires: Module::Build: '0.28' Module::Build::Parse::Yapp: v0.1.2 dynamic_config: 0 generated_by: 'Dist::Zilla version 6.012, CPAN::Meta::Converter version 2.150010' license: bsd meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: Chemistry-OpenSMILES requires: Chemistry::Elements: '0' Graph: '0.97' List::Util: '1.45' Parse::Yapp: '0' Set::Object: '0' perl: '5.010000' resources: bugtracker: https://github.com/merkys/chemistry-opensmiles/issues homepage: https://search.cpan.org/dist/Chemistry-OpenSMILES repository: git://github.com/merkys/chemistry-opensmiles.git version: 0.11.6 x_generated_by_perl: v5.30.0 x_serialization_backend: 'YAML::Tiny version 1.73' Chemistry-OpenSMILES-0.11.6/t/0000775000200400020040000000000014753573665015532 5ustar andriusandriusChemistry-OpenSMILES-0.11.6/t/37_elements.t0000644000200400020040000000167514753573665020053 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Test::More; my @known_elements = ( '*', 'Db', 'as', 'se' ); my @unknown_elements = ( 'D', 'Ha', 'M', 'T', 'X' ); my @unallowed_aromatic = ( 'al', 'si' ); plan tests => @known_elements + @unknown_elements + @unallowed_aromatic; for my $element (@known_elements) { my $parser = Chemistry::OpenSMILES::Parser->new; eval { $parser->parse( "[$element]" ) }; ok !$@, $element; } for my $element (@unknown_elements) { my $parser = Chemistry::OpenSMILES::Parser->new; eval { $parser->parse( "[$element]" ) }; $@ = '' unless $@; is $@, "chemical element with symbol '$element' is unknown\n", $element; } for my $element (@unallowed_aromatic) { my $parser = Chemistry::OpenSMILES::Parser->new; eval { $parser->parse( "[$element]" ) }; $@ = '' unless $@; is $@, "aromatic chemical element '$element' is not allowed\n", $element; } Chemistry-OpenSMILES-0.11.6/t/08_permutation_order.t0000644000200400020040000000152714753573665021773 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Writer; use Test::More; sub order { return join '', Chemistry::OpenSMILES::Writer::_permutation_order( @_ ); } my $random_cases = 20; my @bad_cases = ( [ 0..2 ], [ 0..4 ], [ 1..4 ], [ 0..2, undef ], [ 0, 1, 12, '' ], ); plan tests => $random_cases + (2 * scalar @bad_cases); for (1..$random_cases) { my @order = 0..3; for (0..9) { if( rand() < 0.5 ) { @order = ( @order[1..2], $order[0], $order[3] ); } else { @order = ( $order[0], @order[2..3], $order[1] ); } } is( order( @order ), '0123' ); } for (@bad_cases) { my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; is( order( @$_ ), '0123' ); ok( defined $warning && $warning =~ /unexpected input received/ ); } Chemistry-OpenSMILES-0.11.6/t/15_chirality.t0000644000200400020040000000201014753573665020203 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw(is_chiral mirror); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'N[C@](Br)(O)C', 1, 1, 'N([C@@](Br)(O)(C))' ], [ 'N[C@@](Br)(O)C', 1, 1, 'N([C@](Br)(O)(C))' ], [ 'N(C)(Br)(O)C', 0, 0, 'N(C)(Br)(O)(C)' ], [ 'N(C)(Br)(O)C', 0, 0, 'N(C)(Br)(O)(C)' ], # Square brackets are retained as in raw mode the writer does not attempt to calculate the valence: [ 'N[C@AL1](Br)(O)C', 1, 0, 'N([C](Br)(O)(C))' ], ); plan tests => 3 * scalar @cases; for my $case (@cases) { my $parser; my $moiety; my $result; $parser = Chemistry::OpenSMILES::Parser->new; ( $moiety ) = $parser->parse( $case->[0], { raw => 1 } ); is is_chiral( $moiety ) + 0, $case->[1]; is Chemistry::OpenSMILES::is_chiral_tetrahedral( $moiety ) + 0, $case->[2]; mirror( $moiety ); is write_SMILES( $moiety, { raw => 1 } ), $case->[3]; } Chemistry-OpenSMILES-0.11.6/t/26_octahedral_chirality.t0000644000200400020040000000376514753573665022415 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Stereo qw( chirality_to_pseudograph ); use Chemistry::OpenSMILES::Writer qw( write_SMILES ); use List::Util qw( first ); use Test::More; my @cases = ( [ 'C[Co@](F)(Cl)(Br)(I)S', [ qw( C Co F Cl Br I S ) ], 'C([Co@OH1](F)(Cl)(Br)(I)(S([H])))([H])([H])([H])' ], [ 'C[Co@](F)(Cl)(Br)(I)S', [ qw( F Co S I C Cl Br ) ], 'F([Co@OH2](S([H]))(I)(C([H])([H])([H]))(Cl)(Br))' ], [ 'S[Co@OH5](F)(I)(Cl)(C)Br', [ qw( Br Co C S Cl F I ) ], 'Br([Co@OH9](C([H])([H])([H]))(S([H]))(Cl)(F)(I))' ], [ 'Br[Co@OH12](Cl)(I)(F)(S)C', [ qw( Cl Co C Br F I S ) ], 'Cl([Co@OH15](C([H])([H])([H]))(Br)(F)(I)(S([H])))' ], [ 'Cl[Co@OH19](C)(I)(F)(S)Br', [ qw( I Co Cl Br F S C ) ], 'I([Co@OH27](Cl)(Br)(F)(S([H]))(C([H])([H])([H])))' ], ); eval 'use Graph::Nauty qw( are_isomorphic )'; my $has_Graph_Nauty = !$@; plan tests => @cases + $has_Graph_Nauty * @cases; for my $case (@cases) { my( $input, $order, $output ) = @$case; my $parser; my @moieties; my $result; my $order_sub = sub { my $vertices = shift; for my $symbol (@$order) { my $vertex = first { $_->{symbol} eq $symbol } values %$vertices; return $vertex if $vertex; } my( $vertex ) = values %$vertices; return $vertex; }; $parser = Chemistry::OpenSMILES::Parser->new; my( $input_moiety ) = $parser->parse( $input ); $result = write_SMILES( [ $input_moiety ], { order_sub => $order_sub } ); is $result, $output, $input; next unless $has_Graph_Nauty; my( $output_moiety ) = $parser->parse( $output ); for ( $input_moiety, $output_moiety ) { chirality_to_pseudograph( $_ ); } ok are_isomorphic( $input_moiety, $output_moiety, \&depict ); } sub depict { my( $vertex ) = @_; return '' unless exists $vertex->{symbol}; $vertex = { %$vertex }; delete $vertex->{chirality}; return write_SMILES( $vertex ); } Chemistry-OpenSMILES-0.11.6/t/34_validate_allenes.t0000644000200400020040000000224614753573665021523 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'NC(Br)=[C@]=C(O)C' => undef, 'CC(C)=[C@]=C(C)C' => 'tetrahedral chiral allenal setting for C(3) is not needed as not all 4 neighbours are distinct', 'CC(C)=C=[C@]=C=C(C)C' => 'tetrahedral chiral allenal setting for C(4) is not needed as not all 4 neighbours are distinct', 'C/C(C)=C=[C@]=C(C)/C' => 'tetrahedral chiral allenal setting for C(4) observed for an atom which is not a center of an allenal system', 'F/C=C=C=C/F' => undef, 'F/C=C=C=CF' => 'allene system between atoms C(1) and C(4) has only one cis/trans marker', 'FC=C=C=CF' => 'allene system between atoms C(1) and C(4) has 4 neighbours, but does not have cis/trans setting', ); plan tests => scalar keys %cases; for (sort keys %cases) { my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_ ); Chemistry::OpenSMILES::_validate( $graph, sub { $_[0]->{symbol} } ); $warning =~ s/\n$// if defined $warning; is $warning, $cases{$_}, $_; } Chemistry-OpenSMILES-0.11.6/t/09_tetrahedral_chirality.t0000644000200400020040000000466014753573665022602 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw( mirror ); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Stereo qw( chirality_to_pseudograph ); use Chemistry::OpenSMILES::Writer qw( write_SMILES ); use Data::Dumper; use Test::More; my @cases = ( [ 'N[C@](Br)(O)C', 'N([C@](Br)(O)(C))', 'C([C@](O)(Br)(N))' ], [ 'Br[C@](O)(N)C', 'Br([C@](O)(N)(C))', 'C([C@](N)(O)(Br))' ], [ 'O[C@](Br)(C)N', 'O([C@](Br)(C)(N))', 'N([C@](C)(Br)(O))' ], [ 'Br[C@](C)(O)N', 'Br([C@](C)(O)(N))', 'N([C@](O)(C)(Br))' ], [ 'C[C@](Br)(N)O', 'C([C@](Br)(N)(O))', 'O([C@](N)(Br)(C))' ], [ 'Br[C@](N)(C)O', 'Br([C@](N)(C)(O))', 'O([C@](C)(N)(Br))' ], [ 'C[C@@](Br)(O)N', 'C([C@@](Br)(O)(N))', 'N([C@@](O)(Br)(C))' ], [ 'Br[C@@](N)(O)C', 'Br([C@@](N)(O)(C))', 'C([C@@](O)(N)(Br))' ], [ '[C@@](C)(Br)(O)N', '[C@@](C)(Br)(O)(N)', 'N([C@@](O)(Br)(C))' ], [ '[C@@](Br)(N)(O)C', '[C@@](Br)(N)(O)(C)', 'C([C@@](O)(N)(Br))' ], [ 'C1OCC[C@]1(Cl)Br', 'C1(O(C(C([C@]1(Cl)(Br)))))', 'Br([C@@]1(Cl)(C(C(O(C1)))))' ], ); eval 'use Graph::Nauty qw( are_isomorphic )'; my $has_Graph_Nauty = !$@; plan tests => 2 * @cases + $has_Graph_Nauty * 3 * @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0], { raw => 1 } ); $result = write_SMILES( \@moieties, { raw => 1 } ); is $result, $case->[1]; $result = write_SMILES( \@moieties, { raw => 1, order_sub => \&reverse_order } ); is $result, $case->[2]; next unless $has_Graph_Nauty; # Ensuring the SMILES representations describe isomorphic graphs my @graphs = map { $parser->parse( $_ ) } @$case, $case->[0]; mirror $graphs[3]; for (@graphs) { chirality_to_pseudograph( $_ ); } ok are_isomorphic( $graphs[0], $graphs[1], \&depict ); ok are_isomorphic( $graphs[1], $graphs[2], \&depict ); ok !are_isomorphic( $graphs[0], $graphs[3], \&depict ); } sub depict { my( $vertex ) = @_; if( ref $vertex eq 'HASH' && exists $vertex->{symbol} ) { $vertex = { %$vertex }; delete $vertex->{chirality}; return write_SMILES( $vertex ); } return Dumper $vertex; } sub reverse_order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$b}{number} <=> $vertices->{$a}{number} } keys %$vertices; return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.11.6/t/17_max_hydrogen_count_digits.t0000644000200400020040000000056214753573665023466 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( '[YbH12+3]' => 13, ); plan tests => scalar keys %cases; for (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_, { max_hydrogen_count_digits => 2 } ); is( $graph->vertices, $cases{$_} ); } Chemistry-OpenSMILES-0.11.6/t/25_trigonal_bipyramidal_chirality.t0000644000200400020040000000455514753573665024500 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw( mirror ); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Stereo qw( chirality_to_pseudograph ); use Chemistry::OpenSMILES::Writer qw( write_SMILES ); use Data::Dumper; use List::Util qw( first ); use Test::More; my @cases = ( # Tests from OpenSMILES specification [ 'S[As@TB1](F)(Cl)(Br)N', [ qw( S As Br Cl F N ) ], 'S([As@TB2](Br)(Cl)(F)(N))' ], [ 'S[As@TB5](F)(N)(Cl)Br', [ qw( F As S Cl N Br ) ], 'F([As@TB10](S)(Cl)(N)(Br))' ], [ 'F[As@TB15](Cl)(S)(Br)N', [ qw( Br As Cl S F N ) ], 'Br([As@TB20](Cl)(S)(F)(N))' ], # Local tests [ 'S[As@TB20](F)(Cl)(Br)N', [ qw( S As F Cl Br N ) ], 'S([As@TB20](F)(Cl)(Br)(N))' ], [ 'S[As@TB20](F)(Cl)(Br)N', [ qw( S As Br Cl F N ) ], 'S([As@TB15](Br)(Cl)(F)(N))' ], [ 'S[As@TB20](F)(Cl)(Br)N', [ qw( S As Br N F Cl ) ], 'S([As@TB20](Br)(N)(F)(Cl))' ], [ 'S[As@TB20](F)(Cl)(Br)N', [ qw( S As F N Br Cl ) ], 'S([As@TB15](F)(N)(Br)(Cl))' ], ); eval 'use Graph::Nauty qw( are_isomorphic )'; my $has_Graph_Nauty = !$@; plan tests => @cases + $has_Graph_Nauty * 2 * @cases; for my $case (@cases) { my $parser; my @moieties; my $result; my $order_sub = sub { my( $vertices ) = @_; for my $symbol (@{$case->[1]}) { my $vertex = first { $_->{symbol} eq $symbol } values %$vertices; return $vertex if $vertex; } return values %$vertices; }; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0], { raw => 1 } ); $result = write_SMILES( \@moieties, { raw => 1, order_sub => $order_sub } ); is $result, $case->[2]; next unless $has_Graph_Nauty; # Ensuring the SMILES representations describe isomorphic graphs my @graphs = map { $parser->parse( $_ ) } $case->[0], $case->[2], $case->[0]; mirror $graphs[2]; for (@graphs) { chirality_to_pseudograph( $_ ); } ok are_isomorphic( $graphs[0], $graphs[1], \&depict ), $case->[0] . ' <=> ' . $case->[2]; ok !are_isomorphic( $graphs[0], $graphs[2], \&depict ), $case->[0] . ' <=> ' . $case->[0]; } sub depict { my( $vertex ) = @_; if( ref $vertex eq 'HASH' && exists $vertex->{symbol} ) { $vertex = { %$vertex }; delete $vertex->{chirality}; return write_SMILES( $vertex ); } return Dumper $vertex; } Chemistry-OpenSMILES-0.11.6/t/21_stereo.t0000644000200400020040000000562014753573665017523 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw( is_cis_trans_bond ); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Stereo qw( chirality_to_pseudograph cis_trans_to_pseudoedges mark_all_double_bonds ); use Test::More; my @cases = ( [ 'C/C=C/C', 12, 15, 12, 11, 4 ], [ 'F/N=C(/F)\F', 5, 4, 5, 4, 0 ], ); plan tests => 5 * @cases + 2; my $parser = Chemistry::OpenSMILES::Parser->new; my $moiety; ( $moiety ) = $parser->parse( 'N[C@](Br)(O)C' ); chirality_to_pseudograph( $moiety ); is( $moiety->vertices, 23 ); is( $moiety->edges, 70 ); for my $case (@cases) { my( $smiles, $vertices_with_pseudo, $edges_with_pseudo, $vertices_when_marked, $edges_when_marked, $cis_trans_bonds ) = @$case; my( $moiety ) = $parser->parse( $smiles ); # copy() makes a shallow copy without edge attributes, thus they # have to be added later: my $copy = $moiety->copy; for my $bond ($moiety->edges) { next unless $moiety->has_edge_attribute( @$bond, 'bond' ); $copy->set_edge_attribute( @$bond, 'bond', $moiety->get_edge_attribute( @$bond, 'bond' ) ); } cis_trans_to_pseudoedges( $copy ); is( $copy->vertices, $vertices_with_pseudo ); is( $copy->edges, $edges_with_pseudo ); # Drop cis/trans markers from the input graph and mark them # anew. for my $bond ($moiety->edges) { next unless is_cis_trans_bond( $moiety, @$bond ); $moiety->delete_edge_attribute( @$bond, 'bond' ); } mark_all_double_bonds( $moiety, sub { if( $copy->has_edge( $_[0], $_[3] ) && $copy->has_edge_attribute( $_[0], $_[3], 'pseudo' ) ) { return $copy->get_edge_attribute( $_[0], $_[3], 'pseudo' ); } } ); is( $moiety->vertices, $vertices_when_marked ); is( $moiety->edges, $edges_when_marked ); is( scalar( grep { is_cis_trans_bond( $moiety, @$_ ) } $moiety->edges ), $cis_trans_bonds ); } # The following test must not throw any warnings ( $moiety ) = $parser->parse( 'C=C=C=C' ); # copy() makes a shallow copy without edge attributes, thus they # have to be added later: my $copy = $moiety->copy; for my $bond ($moiety->edges) { next unless $moiety->has_edge_attribute( @$bond, 'bond' ); $copy->set_edge_attribute( @$bond, 'bond', $moiety->get_edge_attribute( @$bond, 'bond' ) ); } cis_trans_to_pseudoedges( $copy ); # Drop cis/trans markers from the input graph and mark them # anew. for my $bond ($moiety->edges) { next unless is_cis_trans_bond( $moiety, @$bond ); $moiety->delete_edge_attribute( @$bond, 'bond' ); } mark_all_double_bonds( $moiety, [] ); Chemistry-OpenSMILES-0.11.6/t/36_kekulise.t0000644000200400020040000000211614753573665020041 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw(is_single_bond); use Chemistry::OpenSMILES::Aromaticity qw(kekulise); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use List::Util qw(max); use Test::More; eval 'use Graph::Nauty qw(canonical_order)'; plan skip_all => 'no Graph::Nauty' if $@; my $repeats = 10; plan tests => 2 * $repeats; for my $reverse (('', 1) x $repeats) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $moiety ) = $parser->parse( 'Cc1c(C)cccc1' ); if( $reverse ) { # Reverse the atom order if requested my $max = max map { $_->{number} } $moiety->vertices; for my $atom ($moiety->vertices) { $atom->{number} = $max - $atom->{number}; } } my @order = canonical_order( $moiety, \&write_SMILES ); my %order; for my $i (0..$#order) { $order{$order[$i]} = $i; } my $order_sub = sub { $order{$_[0]} }; kekulise( $moiety, $order_sub ); ok is_single_bond( $moiety, grep { $moiety->degree($_) == 3 } $moiety->vertices ); } Chemistry-OpenSMILES-0.11.6/t/05_orders.t0000644000200400020040000000215014753573665017515 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'CCC' => {}, 'C-C' => {}, 'C=C' => { '=' => 1 }, 'C-1C1' => {}, 'C=1C1' => { '=' => 1 }, 'C(=O)' => { '=' => 1 }, 'C(C=C)' => { '=' => 1 }, 'cc' => { ':' => 1 }, 'c:c' => { ':' => 1 }, 'c(c)' => { ':' => 1 }, 'c1ccc1' => { ':' => 4 }, 'c-c' => {}, 'c(-c)' => {}, 'c1cc-1' => { ':' => 2 }, 'c-1cc1' => { ':' => 2 }, ); plan tests => 2 * scalar keys %cases; for my $case (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my @graphs = $parser->parse( $case ); is( scalar @graphs, 1 ); my $graph = shift @graphs; my %orders; for ($graph->edges) { next if !$graph->has_edge_attribute( @$_, 'bond' ); $orders{ $graph->get_edge_attribute( @$_, 'bond' )} ++; } is( serialize( \%orders ), serialize( $cases{$case} ) ); } sub serialize { my( $orders ) = @_; return join ' ', map { $_ . '(' . $orders->{$_} . ')' } sort keys %$orders; } Chemistry-OpenSMILES-0.11.6/t/13_clean_chiral_centers.t0000644000200400020040000000116214753573665022347 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw(clean_chiral_centers); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my %cases = ( 'C[C@H2]O' => 1, 'C[C@H]([H])O' => 1, 'C[C@H]([S])O' => 0, ); plan tests => 2 * scalar keys %cases; for my $case (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my @moieties = $parser->parse( $case, { raw => 1 } ); is scalar @moieties, 1; is scalar clean_chiral_centers( $moieties[0], sub { write_SMILES( $_[0], { raw => 1 } ) } ), $cases{$case}; } Chemistry-OpenSMILES-0.11.6/t/33_allene_graph.t0000644000200400020040000000221214753573665020640 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES; use Chemistry::OpenSMILES::Parser; use Test::More; my @cases = ( # OpenSMILES specification v1.0 [ 'F/C=C=C=C/F', '4' ], [ 'F/C=C=C=C\F', '4' ], [ 'NC(Br)=[C@]=C(O)C', '3' ], ); plan tests => 3 * scalar @cases; for (@cases) { my( $smiles, $result ) = @$_; my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $smiles ); my $allenes = Chemistry::OpenSMILES::_allene_graph( $graph ); is join( ',', map { scalar @$_ } $allenes->connected_components ), $result; my $end_edges = grep { $allenes->has_edge_attribute( @$_, 'allene' ) && $allenes->get_edge_attribute( @$_, 'allene' ) eq 'end' } $allenes->edges; my $mid_edges = grep { $allenes->has_edge_attribute( @$_, 'allene' ) && $allenes->get_edge_attribute( @$_, 'allene' ) eq 'mid' } $allenes->edges; is $end_edges, scalar $allenes->connected_components, 'end edges'; is $mid_edges, 2 * (grep { @$_ % 2 } $allenes->connected_components), 'mid edges'; } Chemistry-OpenSMILES-0.11.6/t/04_errors.t0000644000200400020040000000307114753573665017535 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'C(CC' => 't/04_errors.t: syntax error: missing closing parenthesis.', 'CC)C' => 't/04_errors.t: syntax error: unbalanced parentheses.', 'CCC)' => 't/04_errors.t: syntax error: unbalanced parentheses.', 'C#=O' => 't/04_errors.t: syntax error at position 3: \'O\'.', 'C..O' => 't/04_errors.t: syntax error at position 3: \'O\'.', 'CCC1' => 't/04_errors.t: unclosed ring bond(s) detected: 1.', 'C2%12' => 't/04_errors.t: unclosed ring bond(s) detected: 2, 12.', 'C=1CC$1' => 't/04_errors.t: ring bond types for ring bond 1 do not match.', 'C((C))O' => 't/04_errors.t: syntax error at position 3: \'C))O\'.', '(O)' => 't/04_errors.t: syntax error at position 1: \'O)\'.', '(O)C' => 't/04_errors.t: syntax error at position 1: \'O)C\'.', '.CC' => 't/04_errors.t: syntax error at position 1: \'CC\'.', 'CC.' => 't/04_errors.t: syntax error at position 4.', # The following SMILES are all strange and should at least be warned about: 'C11 ' => 'atom cannot be bonded to itself', 'C1C1' => undef, 'C12CCCCC12' => undef, 'C[C@]C' => undef, ); plan tests => scalar keys %cases; for (sort keys %cases) { 'intentionally polluting $1' =~ /(\S+)/; my $error; eval { my $parser = Chemistry::OpenSMILES::Parser->new; my @graphs = $parser->parse( $_ ); }; $error = $@ if $@; $error =~ s/\n$// if $error; is $error, $cases{$_}; } Chemistry-OpenSMILES-0.11.6/t/22_rings.t0000644000200400020040000000152414753573665017344 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw( is_ring_atom is_ring_bond ); use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C' => { atoms => 9, bonds => 10 }, ); plan tests => 4 * scalar keys %cases; for my $case (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $molecule ) = $parser->parse( $case ); is scalar( grep { is_ring_atom( $molecule, $_ ) } $molecule->vertices ), $cases{$case}->{atoms}; is scalar( grep { is_ring_bond( $molecule, @$_ ) } $molecule->edges ), $cases{$case}->{bonds}; is scalar( grep { is_ring_atom( $molecule, $_, -1 ) } $molecule->vertices ), $cases{$case}->{atoms}; is scalar( grep { is_ring_bond( $molecule, @$_, -1 ) } $molecule->edges ), $cases{$case}->{bonds}; } Chemistry-OpenSMILES-0.11.6/t/35_chirality_to_pseudograph.t0000644000200400020040000000274214753573665023324 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw(clean_chiral_centers); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Stereo qw(chirality_to_pseudograph); use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; eval 'use Graph::Nauty qw(orbits)'; plan skip_all => 'no Graph::Nauty' if $@; sub depict { ref $_[0] && exists $_[0]->{symbol} ? &write_SMILES : '' } my @cases = ( [ '[C@H4]', 'C,HHHH' ], [ '[C@H3][C@H3]', 'CC,HHHHHH' ], [ '[P@@](C(C)C)(C(C)C)(C(C)C)N', 'CCC,CCCCCC,HH,HHH,HHHHHHHHHHHHHHHHHH,N,P' ], [ '[P@@]([C@@H](C)C)([C@@H](C)C)([C@@H](C)C)N', 'CCC,CCC,CCC,HH,HHH,HHHHHHHHH,HHHHHHHHH,N,P' ], [ '[C@TB1]([H])([H])([H])([H])([H])', 'C,HH,HHH' ], [ '[C@TB1](F)([H])([H])([H])([H])', 'C,F,H,HHH' ], [ '[C@TB1](F)([H])([H])([H])(F)', 'C,FF,HHH' ], [ '[C@OH1]([H])([H])([H])([H])([H])([H])', 'C,HHHHHH' ], [ '[C@OH1]([H])([H])([H])([H])([H])F', 'C,F,H,HHHH' ], ); plan tests => scalar @cases; for my $case (@cases) { my( $smiles, $orbits_test ) = @$case; my $parser = Chemistry::OpenSMILES::Parser->new; my( $moiety ) = $parser->parse( $smiles ); my $copy = $moiety->copy; chirality_to_pseudograph( $copy ); my $orbits_result = join ',', sort map { join '', map { $_->{symbol} } @$_ } grep { exists $_->[0]{symbol} } orbits( $copy, \&depict ); is $orbits_result, $orbits_test, $smiles; } Chemistry-OpenSMILES-0.11.6/t/32_write.t0000644000200400020040000000211614753573665017353 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ '[C]1[C]/C=N/[C][C][C][C][C]1', '[C]1([C](/C(=N(/[C]([C]([C]([C]([C]1))))))([H])))' ], [ '[C:6]1[C:7]/C=N/[C:1][C:2][C:3][C:4][C:5]1', '[C:1]\1([C:2]([C:3]([C:4]([C:5]([C:6]([C:7](/C(=N/1)([H]))))))))' ], ); plan tests => scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0] ); $result = write_SMILES( \@moieties, { order_sub => \&class_order } ); is $result, $case->[1]; } sub class_order { my $vertices = shift; my @classed = grep { $vertices->{$_}{class} } keys %$vertices; my @classless = grep { !$vertices->{$_}{class} } keys %$vertices; my @sorted = ( (sort { $vertices->{$a}{class} <=> $vertices->{$b}{class} } @classed), (sort { $vertices->{$a}{number} <=> $vertices->{$b}{number} } @classless) ); return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.11.6/t/20_aromaticity.t0000644000200400020040000000176314753573665020552 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Aromaticity qw( aromatise kekulise ); use Chemistry::OpenSMILES::Writer qw( write_SMILES ); use Test::More; my @cases = ( [ 'C1=CC=CC=C1', 'c:1(:c(:c(:c(:c(:c:1([H]))([H]))([H]))([H]))([H]))([H])', 'C=1(C(=C(C(=C(C=1([H]))([H]))([H]))([H]))([H]))([H])' ], [ 'C1=CC=CC=C1C1=CC=CC=C1', 'c:1(:c(:c(:c(:c(:c:1(-c:1(:c(:c(:c(:c(:c:1([H]))([H]))([H]))([H]))([H]))))([H]))([H]))([H]))([H]))([H])', 'C=1(C(=C(C(=C(C=1(C=1(C(=C(C(=C(C=1([H]))([H]))([H]))([H]))([H]))))([H]))([H]))([H]))([H]))([H])' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $result; my $parser = Chemistry::OpenSMILES::Parser->new; my( $moiety ) = $parser->parse( $case->[0] ); aromatise( $moiety ); $result = write_SMILES( [ $moiety ] ); is $result, $case->[1]; kekulise( $moiety ); $result = write_SMILES( [ $moiety ] ); is $result, $case->[2]; } Chemistry-OpenSMILES-0.11.6/t/14_write_disconnected.t0000644000200400020040000000074314753573665022101 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Graph::Undirected; use Test::More; plan tests => 2; my $graph = Graph::Undirected->new( refvertexed => 1 ); $graph->add_vertex( { symbol => 'C', number => 1 } ); $graph->add_vertex( { symbol => 'O', number => 2 } ); my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; is write_SMILES( [ $graph ] ), '[C]'; is $warning, '1 unreachable atom(s) detected in moiety' . "\n"; Chemistry-OpenSMILES-0.11.6/t/28_octahedral_chirality.t0000644000200400020040000000131614753573665022405 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Algorithm::Combinatorics qw( permutations ); use Chemistry::OpenSMILES::Writer; use Test::More; my @order_permutations = permutations( [ 0..5 ] ); plan tests => @order_permutations * 30; for my $permutation (@order_permutations) { for (1..30) { my $chirality = Chemistry::OpenSMILES::Writer::_octahedral_chirality( @$permutation, '@OH' . $_ ); my @reverse_permutation = reverse_permutation( @$permutation ); is Chemistry::OpenSMILES::Writer::_octahedral_chirality( @reverse_permutation, $chirality ), '@OH' . $_; } } sub reverse_permutation { my @order = @_; return sort { $order[$a] <=> $order[$b] } 0..$#order; } Chemistry-OpenSMILES-0.11.6/t/18_chirality_reference.t0000644000200400020040000000154114753573665022234 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw( is_chiral ); use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'C[C@@](Br)(O)N' => 0, 'Br[C@@](N)(O)C' => 0, '[C@@](C)(Br)(O)N' => 1, '[C@@](Br)(N)(O)C' => 1, 'FC1C[C@](Br)(Cl)CCC1' => 2, '[C@]1(Br)(Cl)CCCC(F)C1' => 8, 'C1.[C@]1(Br)(Cl)O' => 0, 'C(CCCC1)[C@]1(Br)(Cl)' => 0, 'C([C@](Br)(Cl)O)C' => 0, ); plan tests => 3 * scalar keys %cases; for (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_ ); my( $chiral_center ) = grep { is_chiral $_ } $graph->vertices; ok( defined $chiral_center ); ok( exists $chiral_center->{chirality_neighbours} ); is( $chiral_center->{chirality_neighbours}[0]{number}, $cases{$_} ); } Chemistry-OpenSMILES-0.11.6/t/30_trigonal_bipyramidal_chirality.t0000644000200400020040000000054714753573665024471 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Writer; use Test::More; my @cases = ( [ qw( 0 1 2 3 4 @TB1 @TB1 ) ], [ qw( 0 1 3 2 4 @TB1 @TB2 ) ], ); plan tests => scalar @cases; for my $case (@cases) { my $result = pop @$case; is Chemistry::OpenSMILES::Writer::_trigonal_bipyramidal_chirality( @$case ), $result; } Chemistry-OpenSMILES-0.11.6/t/24_square_planar_chirality.t0000644000200400020040000000340414753573665023130 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Stereo qw( chirality_to_pseudograph ); use Chemistry::OpenSMILES::Writer qw( write_SMILES ); use Data::Dumper; use Test::More; my @cases = ( [ 'N[C@SP1](Br)(O)C', 'N([C@SP1](Br)(O)(C))', 'C([C@SP1](O)(Br)(N))' ], [ 'N[C@SP2](Br)(O)C', 'N([C@SP2](Br)(O)(C))', 'C([C@SP2](O)(Br)(N))' ], [ 'N[C@SP3](Br)(O)C', 'N([C@SP3](Br)(O)(C))', 'C([C@SP3](O)(Br)(N))' ], ); eval 'use Graph::Nauty qw( are_isomorphic )'; my $has_Graph_Nauty = !$@; plan tests => 2 * @cases + $has_Graph_Nauty * 2 * @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0], { raw => 1 } ); $result = write_SMILES( \@moieties, { raw => 1 } ); is $result, $case->[1]; $result = write_SMILES( \@moieties, { raw => 1, order_sub => \&reverse_order } ); is $result, $case->[2]; next unless $has_Graph_Nauty; # Ensuring the SMILES representations describe isomorphic graphs my @graphs = map { $parser->parse( $_ ) } @$case; for (@graphs) { chirality_to_pseudograph( $_ ); } ok are_isomorphic( $graphs[0], $graphs[1], \&depict ); ok are_isomorphic( $graphs[1], $graphs[2], \&depict ); } sub depict { my( $vertex ) = @_; if( ref $vertex eq 'HASH' && exists $vertex->{symbol} ) { $vertex = { %$vertex }; delete $vertex->{chirality}; return write_SMILES( $vertex ); } return Dumper $vertex; } sub reverse_order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$b}{number} <=> $vertices->{$a}{number} } keys %$vertices; return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.11.6/t/01_atoms.t0000644000200400020040000000162114753573665017340 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw( write_SMILES ); use Test::More; my @cases = qw( C N Cl * [U] [Pb] [He] [CH4] [ClH] [Cl-] [Cu+2] [13CH4] [2H+] [238U] ); my %cases = ( '[*]' => '[*]', '[ClH1]' => '[ClH]', '[Cu++]' => '[Cu+2]', '[OH-1]' => '[OH-]', '[OH1-]' => '[OH-]', # '[C@TH1]' => '[C@TH1]', # These no longer make any sense even in raw parsing # '[C@TH2]' => '[C@TH2]', # These no longer make any sense even in raw parsing '[C]' => '[C]', map { $_ => $_ } @cases, ); plan tests => 2 * scalar keys %cases; for (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_, { raw => 1 } ); is $graph->vertices, 1; is write_SMILES( $graph, { raw => 1 } ), $cases{$_}; } Chemistry-OpenSMILES-0.11.6/t/19_lone_pairs.t0000644000200400020040000000233414753573665020363 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ '[C@](C)(N)(O)', '[C@](C)(N)(O)', 'O([C@](N)(C))' ], # Same as before, inverting enumeration direction: [ '[C@@](C)(N)(O)', '[C@@](C)(N)(O)', 'O([C@@](N)(C))' ], [ 'C[C@](O)(N)', 'C([C@](O)(N))', 'N([C@@](O)(C))' ], # Same as before, inverting enumeration direction: [ 'C[C@@](O)(N)', 'C([C@@](O)(N))', 'N([C@](O)(C))' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0] ); $result = write_SMILES( \@moieties ); is( drop_H( $result ), $case->[1] ); $result = write_SMILES( \@moieties, \&reverse_order ); is( drop_H( $result ), $case->[2] ); } sub drop_H { my( $smiles ) = @_; $smiles =~ s/\(\[H\]\)//g; $smiles =~ s/^\[H\]\((.+)\)$/$1/; return $smiles; } sub reverse_order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$b}{number} <=> $vertices->{$a}{number} } keys %$vertices; return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.11.6/t/10_validate.t0000644000200400020040000000415514753573665020013 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( '[C@]' => 'tetrahedral chiral center C(0) has 0 bonds while at least 3 are required', 'C/C(\O)=C(/C)(\O)' => 'atom C(1) has 2 bonds of type \'\\\', cis/trans definitions must not conflict', 'C(Cl)(F)(O)' => 'atom C(0) has 4 distinct neighbours, but does not have a chiral setting', 'C/C' => 'cis/trans bond is defined between atoms C(0) and C(1), but neither of them is attached to a double bond', 'C/C=C' => 'double bond between atoms C(1) and C(2) has only one cis/trans marker', # Atom coloring is not given, thus the following is not detected as unimportant chiral center 'CC(C)=[C@]=C(C)C' => undef, # COD entry 2230139, r176798, chemical name translated by OPSIN v2.8.0 # The mentioned double bond gets its marker from another double bond 'C(C=C)(=O)N1C\C(\C(/C(/C1)=C/C1=C(C=CC=C1)Cl)=O)=C/C1=C(C=CC=C1)Cl' => 'double bond between atoms C(20) and C(21) has only one cis/trans marker', # OpenSMILES specification v1.0 'NC(Br)=[C@]=C(O)C' => undef, # COD entry 1100141, r297562 'O1C2CCCc3c2c(ccc3OC)c2ccccc2C1=O' => 'aromatic bond between atoms c(7) and c(13) is outside an aromatic ring', # COD entry 1501863, r297409 'B(C(=CC(C)(C)C)c1c(F)c(F)c(F)c(F)c1F)(c1c(F)c(F)c(F)c(F)c1F)/c1c(F)c(F)c(F)c(F)c1F' => 'cis/trans bond is defined between atoms B(0) and c(29), but neither of them is attached to a double bond', # COD entry 1547257, r297409 'O=C(/C=C/c1c(OC)cccc1OC)/C=C(O)/C=C/c1c(OC)cccc1OC' => undef, # From COD entry 4501115, r297562 'OC(=C\C(=O)/C=C/c1ccc(O)c(OC)c1)/C=C/c1cc(OC)c(cc1)O' => undef, 'OC(=C\C/C=C/c1ccc(O)c(OC)c1)/C=C/c1cc(OC)c(cc1)O' => undef, ); plan tests => scalar keys %cases; for (sort keys %cases) { my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_ ); Chemistry::OpenSMILES::_validate( $graph ); $warning =~ s/\n$// if defined $warning; is $warning, $cases{$_}, $_; } Chemistry-OpenSMILES-0.11.6/t/11_validate_color.t0000644000200400020040000000304114753573665021203 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw( clean_chiral_centers ); use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'C[C@](C)(C)(C)' => 'tetrahedral chiral setting for C(1) is not needed as not all 4 neighbours (including possible lone pair) are distinct', 'C[C@](Cl)(F)(O)' => undef, 'C(Cl)(F)(O)' => 'atom C(0) has 4 distinct neighbours, but does not have a chiral setting', # Anomers must not loose chirality settings 'N[C@@]12NC(N[C@]2(NC(N1))N)' => undef, ); plan tests => 3 * scalar keys %cases; for (sort keys %cases) { my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_ ); Chemistry::OpenSMILES::_validate( $graph, sub { return $_[0]->{symbol} } ); $warning =~ s/\n$// if defined $warning; is $warning, $cases{$_}; # Unnecessary chiral centers should be removed my @affected = clean_chiral_centers( $graph, sub { return $_[0]->{symbol} } ); is @affected != 0, defined $cases{$_} && $cases{$_} =~ /not needed/, "$_ - affected atoms"; # After removal, validation should pass undef $warning; Chemistry::OpenSMILES::_validate( $graph, sub { return $_[0]->{symbol} } ); $warning =~ s/\n$// if defined $warning; is !defined $warning, !defined $cases{$_} || @affected != 0, "$_ - validation"; } Chemistry-OpenSMILES-0.11.6/t/03_hcount.t0000644000200400020040000000217414753573665017523 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'C' => 5, 'C[U]' => 5, 'N' => 4, 'Cl' => 2, '[U]' => 1, '[Pb]' => 1, '[He]' => 1, '[*]' => 1, '[CH4]' => 5, '[ClH]' => 2, '[ClH1]' => 2, '[Cl-]' => 1, '[OH1-]' => 2, '[OH-1]' => 2, '[Cu+2]' => 1, '[Cu++]' => 1, '[13CH4]' => 5, '[2H+]' => 1, '[238U]' => 1, 'S' => 3, 'S[O]' => 3, 'S([O])([O])' => 3, 'S([O])([O])([O])' => 5, 'S([O])([O])([O])([O])' => 5, 'S([O])([O])([O])([O])([O])' => 7, 'O([O])' => 3, 'O([O])([O])' => 3, 'O([O])([O])([O])' => 4, 'CC' => 8, 'C-C' => 8, 'C=C' => 6, 'C#C' => 4, 'C$C' => 2, 'C1=CC=CC=C1' => 12, 'c1ccccc1' => 12, 'c1cncnc1' => 10, ); plan tests => scalar keys %cases; for (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_ ); is( $graph->vertices, $cases{$_} ); } Chemistry-OpenSMILES-0.11.6/t/23_clean_chiral_centers.t0000644000200400020040000000242514753573665022353 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw(clean_chiral_centers); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Stereo qw(chirality_to_pseudograph); use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; eval 'use Graph::Nauty qw(orbits)'; plan skip_all => 'no Graph::Nauty' if $@; sub depict { my( $vertex ) = @_; if( ref $vertex && exists $vertex->{symbol} ) { return &write_SMILES; } else { return ''; } } my @orbits; sub orbit { my( $vertex ) = @_; for my $i (0..$#orbits) { return $i if grep { $_ == $vertex } @{$orbits[$i]}; } } my @cases = ( [ 'CC[C@](CO)(CCl)C', 0 ], [ 'CC[C@](CC)(CC)CC', 1 ], [ 'C[S@](O)(O)[O-]', 0 ], # FIXME: Something is off here, should be 1! # Anomers [ '[C@]1(F)(Cl)CCCCC1', 0 ], [ '[C@]1(F)(F)CCCCC1', 1 ], [ '[C@H2]1CCCCC1', 1 ], ); plan tests => scalar @cases; for my $case (@cases) { my( $smiles, $changed ) = @$case; my $parser = Chemistry::OpenSMILES::Parser->new; my( $moiety ) = $parser->parse( $smiles ); my $copy = $moiety->copy; chirality_to_pseudograph( $copy ); @orbits = orbits( $copy, \&depict ); is scalar clean_chiral_centers( $moiety, \&orbit ), $changed, $smiles; } Chemistry-OpenSMILES-0.11.6/t/12_tetrahedral_chirality.t0000644000200400020040000000146214753573665022571 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'Br[C@H](Cl)(I)', 'Br([C@](Cl)(I)([H]))', '[H]([C@](I)(Cl)(Br))' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0] ); $result = write_SMILES( \@moieties ); is( $result, $case->[1] ); $result = write_SMILES( \@moieties, \&reverse_order ); is( $result, $case->[2] ); } sub reverse_order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$b}{number} <=> $vertices->{$a}{number} } keys %$vertices; return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.11.6/t/27_valence.t0000644000200400020040000000112114753573665017635 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw( valence ); use Chemistry::OpenSMILES::Parser; use Test::More; my @cases = ( [ 'C', '4,1,1,1,1' ], [ '[C]', '0' ], [ 'CCC', '4,4,4,1,1,1,1,1,1,1,1' ], [ '[C@](C)(N)(O)', '3,4,3,2,1,1,1,1,1,1' ], ); plan tests => scalar @cases; for my $case (@cases) { my $result; my $parser = Chemistry::OpenSMILES::Parser->new; my( $moiety ) = $parser->parse( $case->[0] ); is join( ',', map { valence( $moiety, $_ ) } sort { $a->{number} <=> $b->{number} } $moiety->vertices ), $case->[1]; } Chemistry-OpenSMILES-0.11.6/t/02_chains.t0000644000200400020040000000357714753573665017477 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use List::Util qw(sum); use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'CC' => [ 2, 1 ], 'CCO' => [ 3, 2 ], 'NCCCC' => [ 5, 4 ], 'CCCCN' => [ 5, 4 ], 'C=C' => [ 2, 1 ], 'C#N' => [ 2, 1 ], 'CC#CC' => [ 4, 3 ], 'CCC=O' => [ 4, 3 ], '[Rh-](Cl)(Cl)(Cl)(Cl)$[Rh-](Cl)(Cl)(Cl)Cl' => [ 10, 9 ], 'C-C' => [ 2, 1 ], 'CCC(CC)CO' => [ 7, 6 ], 'CC(C)C(=O)C(C)C' => [ 8, 7 ], 'OCC(CCC)C(C(C)C)CCC' => [ 13, 12 ], 'OS(=O)(=S)O' => [ 5, 4 ], 'C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C))))))))))))))))))))C' => [ 22, 21 ], 'C1CCCCC1' => [ 6, 6 ], 'N1CC2CCCCC2CC1' => [ 10, 11 ], 'C=1CCCCC=1' => [ 6, 6 ], 'C1CCCCC1C1CCCCC1' => [ 12, 13 ], 'C1CCCCC1C2CCCCC2' => [ 12, 13 ], 'C0CCCCC0' => [ 6, 6 ], 'C%25CCCCC%25' => [ 6, 6 ], 'C1CCCCC%01' => [ 6, 6 ], 'C12(CCCCC1)CCCCC2' => [ 11, 12 ], # The following case is not allowed by OpenSMILES specification, # however, it is easier to support it than forbid. 'C(CCCCC1)12CCCCC2' => [ 11, 12 ], '[Na+].[Cl-]' => [ 2, 2, 0 ], 'c1cc(O.NCCO)ccc1' => [ 2, 11, 10 ], 'Oc1cc(.NCCO)ccc1' => [ 2, 11, 10 ], 'C1.C1' => [ 1, 2, 1 ], 'C1.C12.C2' => [ 1, 3, 2 ], 'c1c2c3c4cc1.Br2.Cl3.Cl4' => [ 1, 9, 9 ], 'C(C(C1))C1' => [ 4, 4 ], ); plan tests => 3 * scalar keys %cases; for my $case (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my @graphs = $parser->parse( $case, { raw => 1 } ); is( scalar @graphs, @{$cases{$case}} == 3 ? $cases{$case}->[0] : 1 ); is( sum( map { scalar $_->vertices } @graphs ), $cases{$case}->[-2] ); is( sum( map { scalar $_->edges } @graphs ), $cases{$case}->[-1] ); } Chemistry-OpenSMILES-0.11.6/t/16_unsupported_chirality.t0000644000200400020040000000147214753573665022667 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'N[C@](Br)(O)C', 'N([C@](Br)(O([H]))(C([H])([H])([H])))([H])([H])', undef ], [ 'NC(Br)=[C@]=C(O)C', 'N(C(Br)(=C(=C(O([H]))(C([H])([H])([H])))))([H])([H])', undef ], [ 'N[C@](Br)(O)(C)(Cl)', 'N([C@TB1](Br)(O([H]))(C([H])([H])([H]))(Cl))([H])([H])', undef ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; my $parser = Chemistry::OpenSMILES::Parser->new; my @moieties = $parser->parse( $case->[0] ); is( write_SMILES( \@moieties ), $case->[1] ); $warning =~ s/\n$// if $warning; is $warning, $case->[2]; } Chemistry-OpenSMILES-0.11.6/t/31_write.t0000644000200400020040000000272614753573665017361 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ '[CH:4](=[CH:3]/[C:2][O:1])\[C:5][O:6]', '[C:4](=[C:3](/[C:2]([O:1]))([H]))(\[C:5]([O:6]))([H])', '[H]([C:3](/[C:2]([O:1]))(=[C:4]([H])(\[C:5]([O:6]))))', '[O:1]([C:2](\[C:3](=[C:4](\[C:5]([O:6]))([H]))([H])))' ], ); plan tests => 3 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0] ); $result = write_SMILES( \@moieties ); is $result, $case->[1]; $result = write_SMILES( \@moieties, { order_sub => \&reverse_order } ); is $result, $case->[2]; $result = write_SMILES( \@moieties, { order_sub => \&class_order } ); is $result, $case->[3]; } sub reverse_order { my $vertices = shift; my @sorted = sort { $vertices->{$b}{number} <=> $vertices->{$a}{number} } keys %$vertices; return $vertices->{shift @sorted}; } sub class_order { my $vertices = shift; my @classed = grep { $vertices->{$_}{class} } keys %$vertices; my @classless = grep { !$vertices->{$_}{class} } keys %$vertices; my @sorted = ( (sort { $vertices->{$a}{class} <=> $vertices->{$b}{class} } @classed), (sort { $vertices->{$a}{number} <=> $vertices->{$b}{number} } @classless) ); return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.11.6/t/07_cistrans.t0000644000200400020040000000254014753573665020052 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'Br/C=C/F', 'Br(/C(=C(/F)))', 'F(\C(=C(\Br)))' ], [ 'C(\Br)=C/F', 'C(\Br)(=C(/F))', 'F(\C(=C(\Br)))' ], [ 'Br\C=C/F', 'Br(\C(=C(/F)))', 'F(\C(=C(/Br)))' ], [ 'C(/Br)=C/F', 'C(/Br)(=C(/F))', 'F(\C(=C(/Br)))' ], # Adapted from COD entry 1100225: [ 'Cl/C(=C\1COCN1)C', 'Cl(/C(=C\1(C(O(C(N/1)))))(C))', 'C(C(=C1(\N(C(O(C1)))))(\Cl))' ], # The following two cases are synonymous: [ 'C\1CCOC/1=C/O', 'C\1(C(C(O(C/1(=C(/O))))))', 'O(\C(=C/1(O(C(C(C\1))))))' ], [ 'C1CCOC/1=C/O', 'C\1(C(C(O(C/1(=C(/O))))))', 'O(\C(=C/1(O(C(C(C\1))))))' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0], { raw => 1 } ); $result = write_SMILES( \@moieties, { raw => 1 } ); is $result, $case->[1]; $result = write_SMILES( \@moieties, { raw => 1, order_sub => \&reverse_order } ); is $result, $case->[2]; } sub reverse_order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$b}{number} <=> $vertices->{$a}{number} } keys %$vertices; return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.11.6/t/06_write.t0000644000200400020040000000315714753573665017362 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'C', 'C' ], [ 'C=C', 'C(=C)' ], [ 'C=1=C=C=C=1', 'C=1(=C(=C(=C=1)))' ], [ 'C#C.c1ccccc1', 'C(#C).c:1(:c(:c(:c(:c(:c:1)))))' ], [ 'C1CC2CCCCC2CC1', 'C1(C(C2(C(C(C(C(C2(C(C1)))))))))' ], # A strange way to write fused rings: [ 'C1(CCCCC11)(CCCC1)', 'C12(C(C(C(C(C1(C(C(C(C2)))))))))' ], # Single bonds between two aromatic atoms must be explicitly represented: [ 'c1cc-ccc1', 'c:1(:c(:c(-c(:c(:c:1)))))' ], # Chirality information is preserved: [ 'N[C@](Br)(O)C', 'N([C@](Br)(O)(C))' ], [ 'N[C@@](Br)(O)C', 'N([C@@](Br)(O)(C))' ], # A regression test for previously incorrectly identified aromatic bond: [ 'c1(c(cccc1)F)C(=O)[O-]', 'c:1(:c(:c(:c(:c(:c:1))))(F))(C(=O)([O-]))' ], # Cyclooctatetraene adapted from OpenSMILES v1.0 specification: [ 'C/1=C/C=C\C=C/C=C\1', 'C/1(=C(/C(=C(\C(=C(/C(=C\1)))))))' ], # A regression test for impropertly recorded fact that 0 H atoms are present: [ '[C]#[O]', '[C](#[O])' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0], { raw => 1 } ); $result = write_SMILES( \@moieties, { raw => 1 } ); is $result, $case->[1]; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $result, { raw => 1 } ); $result = write_SMILES( \@moieties, { raw => 1 } ); is $result, $case->[1]; } Chemistry-OpenSMILES-0.11.6/t/29_trigonal_bipyramidal_chirality.t0000644000200400020040000000134214753573665024473 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Algorithm::Combinatorics qw( permutations ); use Chemistry::OpenSMILES::Writer; use Test::More; my @order_permutations = permutations( [ 0..4 ] ); plan tests => @order_permutations * 20; for my $permutation (@order_permutations) { for (1..20) { my $chirality = Chemistry::OpenSMILES::Writer::_trigonal_bipyramidal_chirality( @$permutation, '@TB' . $_ ); my @reverse_permutation = reverse_permutation( @$permutation ); is Chemistry::OpenSMILES::Writer::_trigonal_bipyramidal_chirality( @reverse_permutation, $chirality ), '@TB' . $_; } } sub reverse_permutation { my @order = @_; return sort { $order[$a] <=> $order[$b] } 0..$#order; } Chemistry-OpenSMILES-0.11.6/lib/0000775000200400020040000000000014753573665016035 5ustar andriusandriusChemistry-OpenSMILES-0.11.6/lib/Chemistry/0000775000200400020040000000000014753573665020004 5ustar andriusandriusChemistry-OpenSMILES-0.11.6/lib/Chemistry/OpenSMILES.pm0000644000200400020040000007037614753573665022173 0ustar andriusandriuspackage Chemistry::OpenSMILES; # ABSTRACT: OpenSMILES format reader and writer our $VERSION = '0.11.6'; # VERSION use strict; use warnings; use 5.0100; use Chemistry::OpenSMILES::Stereo::Tables qw( @OH @TB ); use Graph::Traversal::BFS; use List::Util qw( all any first max min none sum0 ); require Exporter; our @ISA = qw( Exporter ); our @EXPORT_OK = qw( %bond_order_to_symbol %bond_symbol_to_order %normal_valence clean_chiral_centers is_aromatic is_aromatic_bond is_chiral is_chiral_allenal is_chiral_octahedral is_chiral_planar is_chiral_tetrahedral is_chiral_trigonal_bipyramidal is_cis_trans_bond is_double_bond is_ring_atom is_ring_bond is_single_bond is_triple_bond mirror toggle_cistrans valence ); sub is_chiral($); sub is_chiral_planar($); sub is_chiral_tetrahedral($); sub mirror($); sub toggle_cistrans($); our %normal_valence = ( B => [ 3 ], C => [ 4 ], N => [ 3, 5 ], O => [ 2 ], P => [ 3, 5 ], S => [ 2, 4, 6 ], F => [ 1 ], Cl => [ 1 ], Br => [ 1 ], I => [ 1 ], c => [ 3 ], # Not from OpenSMILES specification ); our %bond_order_to_symbol = ( 1 => '-', 1.5 => ':', 2 => '=', 3 => '#', 4 => '$', ); our %bond_symbol_to_order = ( '-' => 1, ':' => 1.5, '=' => 2, '#' => 3, '$' => 4, ); # Removes chiral setting from allenal, square planar, tetrahedral and trigonal bipyramidal chiral centers if deemed unimportant. # For allenal, tetrahedral and trigonal bipyramidal arrangements when not all the neighbours are distinct. # For square planar arrangements this means situations when all neighbours are the same. # Chiral centers with lone pairs are left untouched. # Returns the affected atoms. # # TODO: check other chiral centers sub clean_chiral_centers($$) { my( $moiety, $color_sub ) = @_; my @affected; for my $atom ($moiety->vertices) { next unless is_chiral_allenal( $atom ) || is_chiral_planar( $atom ) || is_chiral_tetrahedral( $atom ) || is_chiral_trigonal_bipyramidal( $atom ); # Find neighbours which constitute ring bonds with the atom in question my @ring_neighbours = grep { is_ring_bond( $moiety, $atom, $_, scalar $moiety->edges ) } $moiety->neighbours( $atom ); my $hcount = exists $atom->{hcount} ? $atom->{hcount} : 0; my @neighbours = $moiety->neighbours( $atom ); if( is_chiral_allenal( $atom ) ) { @neighbours = grep { $_ != $atom } map { $moiety->neighbours( $_ ) } @neighbours; } if( is_chiral_trigonal_bipyramidal( $atom ) ) { next if @neighbours + $hcount != 5; } else { next if @neighbours + $hcount != 4; } my %colors; for (@neighbours, ( { symbol => 'H' } ) x $hcount) { $colors{$color_sub->( $_ )}++; } if( is_chiral_planar( $atom ) ) { # Chiral planar center markers make sense even if only two types of atoms are there. next if scalar keys %colors > 2; next if scalar keys %colors == 2 && all { $_ == 2 } values %colors; } elsif( is_chiral_trigonal_bipyramidal( $atom ) ) { next if scalar keys %colors == 5; } else { next if scalar keys %colors == 4; } # Special treatment for anomers if( @ring_neighbours ) { next unless is_chiral_tetrahedral( $atom ); next unless @ring_neighbours == 2; next if $hcount == 1; if( !$hcount ) { my @non_ring_neighbours = grep { $_ != $ring_neighbours[0] && $_ != $ring_neighbours[1] } @neighbours; next unless $color_sub->( $non_ring_neighbours[0] ) eq $color_sub->( $non_ring_neighbours[1] ); } } delete $atom->{chirality}; push @affected, $atom; } return @affected; } sub is_aromatic($) { my( $atom ) = @_; return $atom->{symbol} ne ucfirst $atom->{symbol}; } sub is_aromatic_bond { my( $moiety, $a, $b ) = @_; return $moiety->has_edge_attribute( $a, $b, 'bond' ) && $moiety->get_edge_attribute( $a, $b, 'bond' ) eq ':'; } sub is_chiral($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom return exists $what->{chirality}; } else { # Graph representing moiety return any { is_chiral( $_ ) } $what->vertices; } } sub is_chiral_allenal($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom return $what->{chirality} && $what->{chirality} =~ /^\@AL[12]$/; } else { # Graph representing moiety return any { is_chiral_allenal( $_ ) } $what->vertices; } } sub is_chiral_planar($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom return $what->{chirality} && $what->{chirality} =~ /^\@SP[123]$/; } else { # Graph representing moiety return any { is_chiral_planar( $_ ) } $what->vertices; } } sub is_chiral_tetrahedral($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom # CAVEAT: will fail for allenal configurations of @/@@ in raw mode return $what->{chirality} && $what->{chirality} =~ /^@@?$/; } else { # Graph representing moiety return any { is_chiral_tetrahedral( $_ ) } $what->vertices; } } sub is_chiral_trigonal_bipyramidal($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom return $what->{chirality} && $what->{chirality} =~ /^\@TB([1-9]|1[0-9]|20)$/; } else { # Graph representing moiety return any { is_chiral_trigonal_bipyramidal( $_ ) } $what->vertices; } } sub is_chiral_octahedral($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom return $what->{chirality} && $what->{chirality} =~ /^\@OH([1-9]|[12][0-9]|30)$/; } else { # Graph representing moiety return any { is_chiral_octahedral( $_ ) } $what->vertices; } } sub is_cis_trans_bond { my( $moiety, $a, $b ) = @_; return $moiety->has_edge_attribute( $a, $b, 'bond' ) && $moiety->get_edge_attribute( $a, $b, 'bond' ) =~ /^[\\\/]$/; } sub is_double_bond { my( $moiety, $a, $b ) = @_; return $moiety->has_edge_attribute( $a, $b, 'bond' ) && $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '='; } # An atom is deemed to be a ring atom if any of its bonds is a ring bond. sub is_ring_atom { my( $moiety, $atom, $max_length ) = @_; return '' unless $moiety->degree( $atom ) > 1; return any { is_ring_bond( $moiety, $atom, $_, $max_length ) } $moiety->neighbours( $atom ); } # A bond is deemed to be a ring bond if there is an alternative path # joining its atoms not including the bond in consideration and this # alternative path is not longer than 7 bonds. This is based on # O'Boyle (2012) saying that Open Babel SMILES writer does not output # cis/trans markers for double bonds in rings of size 8 or less due to # them implicilty being cis bonds. # # If maximum ring size is given negative, ring size is not limited. sub is_ring_bond { my( $moiety, $a, $b, $max_length ) = @_; $max_length = 7 unless $max_length; # A couple of shortcuts to reduce the complexity return '' if any { $moiety->degree( $_ ) == 1 } ( $a, $b ); return '' if $moiety->vertices > $moiety->edges; if( $max_length < 0 ) { # Due to the issue in Graph, bridges() returns strings instead of real objects. # Graph issue: https://github.com/graphviz-perl/Graph/issues/29 my %vertices_by_name = map { $_ => $_ } $moiety->vertices; return none { ( $_->[0] == $a && $_->[1] == $b ) || ( $_->[0] == $b && $_->[1] == $a ) } map { [ map { $vertices_by_name{$_} } @$_ ] } $moiety->bridges; } my $copy = $moiety->copy; $copy->delete_edge( $a, $b ); my %distance = ( $a => 0 ); my $record_length = sub { # Record number of bonds between $a and any other vertex my( $u, $v ) = @_; my @seen = grep { exists $distance{$_} } ( $u, $v ); return '' if @seen != 1; # Can this be 0? my $seen = shift @seen; my $unseen = first { !exists $distance{$_} } ( $u, $v ); $distance{$unseen} = $distance{$seen} + 1; }; my $operations = { start => sub { $a }, tree_edge => $record_length, }; my $traversal = Graph::Traversal::BFS->new( $copy, %$operations ); $traversal->bfs; # $distance{$b} is the distance in bonds. In 8-member rings adjacent # ring atoms have distance of 7 bonds. return exists $distance{$b} && $distance{$b} <= $max_length; } sub is_single_bond { my( $moiety, $a, $b ) = @_; return !$moiety->has_edge_attribute( $a, $b, 'bond' ) || $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '-'; } sub is_triple_bond { my( $moiety, $a, $b ) = @_; return $moiety->has_edge_attribute( $a, $b, 'bond' ) && $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '#'; } sub mirror($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom if( is_chiral_tetrahedral( $what ) ) { $what->{chirality} = $what->{chirality} eq '@' ? '@@' : '@'; } if( is_chiral_allenal( $what ) ) { $what->{chirality} = $what->{chirality} eq '@AL1' ? '@AL2' : '@AL1'; } # Square planar centers are not affected by mirroring, doing nothing if( is_chiral_trigonal_bipyramidal( $what ) ) { my $number = substr $what->{chirality}, 3; my $setting = $TB[$number-1]; my $opposite = first { $TB[$_]->{axis}[0] == $setting->{axis}[0] && $TB[$_]->{axis}[1] == $setting->{axis}[1] && $TB[$_]->{order} ne $setting->{order} } 0..$#TB; $what->{chirality} = '@TB' . ($opposite + 1); } if( is_chiral_octahedral( $what ) ) { my $number = substr $what->{chirality}, 3; my $setting = $OH[$number-1]; my $opposite = first { $OH[$_]->{shape} eq $setting->{shape} && $OH[$_]->{axis}[0] == $setting->{axis}[0] && $OH[$_]->{axis}[1] == $setting->{axis}[1] && $OH[$_]->{order} ne $setting->{order} } 0..$#OH; $what->{chirality} = '@OH' . ($opposite + 1); } } else { for ($what->vertices) { mirror( $_ ); } } } sub toggle_cistrans($) { return $_[0] eq '/' ? '\\' : '/'; } sub valence($$) { my( $moiety, $atom ) = @_; return ($atom->{hcount} ? $atom->{hcount} : 0) + sum0 map { exists $bond_symbol_to_order{$_} ? $bond_symbol_to_order{$_} : 1 } map { $moiety->has_edge_attribute( $atom, $_, 'bond' ) ? $moiety->get_edge_attribute( $atom, $_, 'bond' ) : 1 } $moiety->neighbours( $atom ); } # CAVEAT: requires output from non-raw parsing due issue similar to GH#2 sub _validate($@) { my( $moiety, $color_sub ) = @_; # Identify islands of allene systems my $allenes = _allene_graph( $moiety ); my $color_by_element = sub { $_[0]->{symbol} }; for my $atom (sort { $a->{number} <=> $b->{number} } $moiety->vertices) { if( is_chiral_allenal($atom) ) { if( $moiety->degree($atom) != 2 ) { warn sprintf 'tetrahedral chiral allenal setting for %s(%d) ' . 'has %d bonds while 2 are needed' . "\n", $atom->{symbol}, $atom->{number}, $moiety->degree($atom); next; } if( !$allenes->has_vertex($atom) ) { warn sprintf 'tetrahedral chiral allenal setting for %s(%d) ' . 'is not a part of any allenal system' . "\n", $atom->{symbol}, $atom->{number}; next; } if( none { $allenes->has_edge_attribute( $atom, $_, 'allene' ) && $allenes->get_edge_attribute( $atom, $_, 'allene' ) eq 'mid' } $allenes->neighbours($atom) ) { warn sprintf 'tetrahedral chiral allenal setting for %s(%d) ' . 'observed for an atom which is not a center of ' . 'an allenal system' . "\n", $atom->{symbol}, $atom->{number}; next; } next unless $color_sub; next if is_ring_atom( $moiety, $atom, scalar $moiety->edges ); my @ends = grep { $allenes->has_edge_attribute( $atom, $_, 'allene' ) && $allenes->get_edge_attribute( $atom, $_, 'allene' ) eq 'mid' } $allenes->neighbours($atom); my @neighbours = grep { $_ ne $ends[0] && $_ ne $ends[1] } map { @$_ } grep { !$allenes->has_edge( @$_ ) } map { $moiety->edges_at($_) } @ends; my %colors = map { ($color_sub->( $_ ) => 1) } @neighbours; if( scalar keys %colors != 4 ) { # FIXME: Emits false positives for coordinating metals. # Need to think of a heuristic to exclude them. warn sprintf 'tetrahedral chiral allenal setting for ' . '%s(%d) is not needed as not all 4 neighbours ' . 'are distinct' . "\n", $atom->{symbol}, $atom->{number}; } } elsif( is_chiral_tetrahedral($atom) ) { if( $moiety->degree($atom) < 3 ) { # TODO: there should be a strict mode to forbid lone pairs warn sprintf 'tetrahedral chiral center %s(%d) has %d bonds ' . 'while at least 3 are required' . "\n", $atom->{symbol}, $atom->{number}, $moiety->degree($atom); next; } if( $moiety->degree($atom) > 4 ) { warn sprintf 'tetrahedral chiral center %s(%d) has %d bonds ' . 'while at most 4 are allowed' . "\n", $atom->{symbol}, $atom->{number}, $moiety->degree($atom); next; } next unless $color_sub; next if is_ring_atom( $moiety, $atom, scalar $moiety->edges ); my $has_lone_pair = $moiety->degree($atom) == 3; my %colors = map { ($color_sub->( $_ ) => 1) } $moiety->neighbours($atom); if( scalar keys %colors != 4 - $has_lone_pair ) { warn sprintf 'tetrahedral chiral setting for %s(%d) ' . 'is not needed as not all 4 neighbours ' . '(including possible lone pair) are distinct' . "\n", $atom->{symbol}, $atom->{number}; } } elsif( !is_chiral($atom) && $moiety->degree($atom) == 4 ) { # Warn about unmarked tetrahedral chiral centers my %colors = map { $color_sub ? ($color_sub->($_) => 1) : ($color_by_element->($_) => 1) } $moiety->neighbours($atom); if( scalar keys %colors == 4 ) { warn sprintf 'atom %s(%d) has 4 distinct neighbours, ' . 'but does not have a chiral setting' . "\n", $atom->{symbol}, $atom->{number}; } } } for my $bond (sort { min( map { $_->{number} } @$a ) <=> min( map { $_->{number} } @$b ) || max( map { $_->{number} } @$a ) <=> max( map { $_->{number} } @$b ) } $moiety->edges) { my( $A, $B ) = sort { $a->{number} <=> $b->{number} } @$bond; if( $A eq $B ) { warn sprintf 'atom %s(%d) has bond to itself' . "\n", $A->{symbol}, $A->{number}; next; } if( is_double_bond( $moiety, @$bond ) ) { # Test cis/trans bonds # Detect conflicting cis/trans markers, see COD entry 1547257, r297409 my $cis_trans_A = grep { is_cis_trans_bond( $moiety, $A, $_ ) } $moiety->neighbours($A); my $cis_trans_B = grep { is_cis_trans_bond( $moiety, $B, $_ ) } $moiety->neighbours($B); if( $cis_trans_A && $cis_trans_B ) { # If any of the bond atoms lack cis/trans markers, it means that the other markers are from some other bond for my $atom (@$bond) { my %bond_types = _neighbours_per_bond_type( $moiety, $atom ); for ('/', '\\') { if( $bond_types{$_} && @{$bond_types{$_}} > 1 ) { warn sprintf 'atom %s(%d) has %d bonds of type \'%s\', ' . 'cis/trans definitions must not conflict' . "\n", $atom->{symbol}, $atom->{number}, scalar @{$bond_types{$_}}, $_; } } } } elsif( !$allenes->has_edge( @$bond ) && # Allene systems are checked below $cis_trans_A + $cis_trans_B == 1 ) { # FIXME: Source of false-positives. # Cis/trans bond is out of place if none of neighbouring double bonds have other cis/trans bonds. # This has to include allenal systems. warn sprintf 'double bond between atoms %s(%d) and %s(%d) ' . 'has only one cis/trans marker' . "\n", $A->{symbol}, $A->{number}, $B->{symbol}, $B->{number}; } } elsif( is_cis_trans_bond( $moiety, @$bond ) ) { # Test if next to a double bond. # FIXME: Yields false-positives for delocalised bonds, # see COD entry 1501863. # FIXME: What about triple bond? See COD entry 4103591. my %bond_types; for my $atom (@$bond) { my %bond_types_now = _neighbours_per_bond_type( $moiety, $atom ); for my $key (keys %bond_types_now) { push @{$bond_types{$key}}, @{$bond_types_now{$key}}; } } if( !$bond_types{'='} ) { warn sprintf 'cis/trans bond is defined between atoms ' . '%s(%d) and %s(%d), but neither of them ' . 'is attached to a double bond' . "\n", $A->{symbol}, $A->{number}, $B->{symbol}, $B->{number}; } } } # Check allene systems for my $system (sort { min( map { $_->{number} } @$a ) <=> min( map { $_->{number} } @$b ) } $allenes->connected_components) { next if @$system % 2; my @ends = sort { $a->{number} <=> $b->{number} } map { @$_ } grep { $allenes->has_edge_attribute( @$_, 'allene' ) && $allenes->get_edge_attribute( @$_, 'allene' ) eq 'end' } $allenes->subgraph($system)->edges; my $cis_trans_bonds = grep { is_cis_trans_bond( $moiety, @$_ ) } map { $moiety->edges_at( $_ ) } @ends; if( $cis_trans_bonds == 1 ) { warn sprintf 'allene system between atoms %s(%d) and %s(%d) ' . 'has only one cis/trans marker' . "\n", $ends[0]->{symbol}, $ends[0]->{number}, $ends[1]->{symbol}, $ends[1]->{number}; } next if $cis_trans_bonds; my @neighbours_at_ends = grep { $_ ne $ends[0] && $_ ne $ends[1] } map { @$_ } grep { !is_double_bond( $moiety, @$_ ) } map { $moiety->edges_at( $_ ) } @ends; next unless @neighbours_at_ends == 4; warn sprintf 'allene system between atoms %s(%d) and %s(%d) ' . 'has 4 neighbours, but does not have cis/trans ' . 'setting' . "\n", $ends[0]->{symbol}, $ends[0]->{number}, $ends[1]->{symbol}, $ends[1]->{number}; } # Check for bridging aromatic bonds my $aromatic = $moiety->copy_graph; $aromatic->delete_edges( map { @$_ } grep { !is_aromatic_bond( $moiety, @$_ ) } $moiety->edges ); # Due to the issue in Graph, bridges() returns strings instead of real objects. # Graph issue: https://github.com/graphviz-perl/Graph/issues/29 # The code below works on buggy (< 0.9727) as well as fixed (>= 0.9727) versions. my %vertices_by_name = map { $_ => $_ } $aromatic->vertices; my @bridges = map { [ map { $vertices_by_name{$_} } @$_ ] } $aromatic->bridges; for my $bridge (sort { min( map { $_->{number} } @$a ) <=> min( map { $_->{number} } @$b ) || max( map { $_->{number} } @$a ) <=> max( map { $_->{number} } @$b ) } @bridges) { my( $A, $B ) = sort { $a->{number} <=> $b->{number} } @$bridge; warn sprintf 'aromatic bond between atoms %s(%d) and %s(%d) ' . 'is outside an aromatic ring' . "\n", $A->{symbol}, $A->{number}, $B->{symbol}, $B->{number}; } # TODO: SP, TB, OH chiral centers } sub _allene_graph { my( $moiety ) = @_; my $graph = $moiety->copy; $graph->delete_edges( map { @$_ } grep { !is_double_bond( $moiety, @$_ ) } $moiety->edges ); $graph->delete_vertices( grep { !$graph->degree( $_ ) } $graph->vertices ); for my $system ($graph->connected_components) { my @d1 = grep { $graph->degree( $_ ) == 1 } @$system; my @d2 = grep { $graph->degree( $_ ) == 2 } @$system; if (@d1 == 2 && @d2 && @d1 + @d2 == @$system ) { if( @d2 % 2 ) { my( $center ) = $graph->subgraph( $system )->center_vertices; $graph->set_edge_attribute( $center, $d1[0], 'allene', 'mid' ); $graph->set_edge_attribute( $center, $d1[1], 'allene', 'mid' ); } $graph->set_edge_attribute( @d1, 'allene', 'end' ); } else { $graph->delete_vertices( @$system ); } } return $graph; } sub _neighbours_per_bond_type { my( $moiety, $atom ) = @_; my %bond_types; for my $neighbour ($moiety->neighbours($atom)) { my $bond_type; if( $moiety->has_edge_attribute( $atom, $neighbour, 'bond' ) ) { $bond_type = $moiety->get_edge_attribute( $atom, $neighbour, 'bond' ); } else { $bond_type = ''; } if( $bond_type =~ /^[\\\/]$/ && $atom->{number} > $neighbour->{number} ) { $bond_type = toggle_cistrans $bond_type; } push @{$bond_types{$bond_type}}, $neighbour; } return %bond_types; } 1; __END__ =pod =head1 NAME Chemistry::OpenSMILES - OpenSMILES format reader and writer =head1 SYNOPSIS use Chemistry::OpenSMILES::Parser; my $parser = Chemistry::OpenSMILES::Parser->new; my @moieties = $parser->parse( 'C#C.c1ccccc1' ); $\ = "\n"; for my $moiety (@moieties) { # $moiety is a Graph::Undirected object print scalar $moiety->vertices; print scalar $moiety->edges; } use Chemistry::OpenSMILES::Writer qw(write_SMILES); print write_SMILES( \@moieties ); =head1 DESCRIPTION Chemistry::OpenSMILES provides support for SMILES chemical identifiers conforming to OpenSMILES v1.0 specification (L). Chemistry::OpenSMILES::Parser reads in SMILES strings and returns them parsed to arrays of L objects. Each atom is represented by a hash. Chemistry::OpenSMILES::Writer performs the inverse operation. Generated SMILES strings are by no means optimal. =head2 Molecular graph Disconnected parts of a compound are represented as separate L objects. Atoms are represented as vertices, and bonds are represented as edges. =head3 Atoms Atoms, or vertices of a molecular graph, are represented as hash references: { "symbol" => "C", "isotope" => 13, "chirality" => "@@", "hcount" => 3, "charge" => 1, "class" => 0, "number" => 0, } Except for C, C and C, all keys of hash are optional. Per OpenSMILES specification, default values for C and C are 0. For chiral atoms, the order of its neighbours in input is preserved in an array added as value for C key of the atom hash. =head3 Bonds Bonds, or edges of a molecular graph, rely completely on L internal representation. Bond orders other than single (C<->, which is also a default) are represented as values of edge attribute C. They correspond to the symbols used in OpenSMILES specification. =head2 Options C accepts the following options for key-value pairs in an anonymous hash for its second parameter: =over =item C In OpenSMILES specification the number of attached hydrogen atoms for atoms in square brackets is limited to 9. IUPAC SMILES+ has increased this number to 99. With the value of C the parser could be instructed to allow other than 1 digit for attached hydrogen count. =item C With C set to anything evaluating to true, the parser will not convert neither implicit nor explicit hydrogen atoms in square brackets to atom hashes of their own. Moreover, it will not attempt to unify the representations of chirality. It should be noted, though, that many of subroutines of Chemistry::OpenSMILES expect non-raw data structures, thus processing raw output may produce distorted results. In particular, C calls from L have to be instructed to expect raw data structure: write_SMILES( \@moieties, { raw => 1 } ); =back =head1 CAVEATS Deprecated charge notations (C<--> and C<++>) are supported. OpenSMILES specification mandates a strict order of ring bonds and branches: branched_atom ::= atom ringbond* branch* Chemistry::OpenSMILES::Parser supports both the mandated, and inverted structure, where ring bonds follow branch descriptions. Whitespace is not supported yet. SMILES descriptors must be cleaned of it before attempting reading with Chemistry::OpenSMILES::Parser. The derivation of implicit hydrogen counts for aromatic atoms is not unambiguously defined in the OpenSMILES specification. Thus only aromatic carbon is accounted for as if having valence of 3. Chiral atoms with three neighbours are interpreted as having a lone pair of electrons as the fourth chiral neighbour. The lone pair is always understood as being the second in the order of neighbour enumeration, except when the atom with the lone pair starts a chain. In that case lone pair is the first. =head1 SEE ALSO perl(1) =head1 AUTHORS Andrius Merkys, Emerkys@cpan.orgE =cut Chemistry-OpenSMILES-0.11.6/lib/Chemistry/OpenSMILES/0000775000200400020040000000000014753573665021622 5ustar andriusandriusChemistry-OpenSMILES-0.11.6/lib/Chemistry/OpenSMILES/Parser.yp0000644000200400020040000004017214753573665023432 0ustar andriusandrius# Header section %{ use warnings; use 5.0100; use Chemistry::Elements; use Chemistry::OpenSMILES qw( %bond_symbol_to_order %normal_valence is_aromatic is_chiral toggle_cistrans ); use Graph::Undirected; use List::Util qw( any first sum0 ); %} %% # Rules section # The top-level 'filter' rule smiles: chain ; chain: atom { my $g = Graph::Undirected->new( refvertexed => 1 ); $g->add_vertex( $_[1] ); push @{$_[0]->{USER}{GRAPHS}}, $g; $_[1]->{graph} = $g; $_[1]->{index} = @{$_[0]->{USER}{GRAPHS}}-1; $_[1]->{first_of_chain} = 1; return { first => $_[1], last => $_[1] }; } | chain atom { $_[2]->{graph} = $_[1]->{last}{graph}; $_[2]->{index} = $_[1]->{last}{index}; $_[2]->{graph}->add_edge( $_[1]->{last}, $_[2] ); if( is_aromatic $_[1]->{last} && is_aromatic $_[2] ) { $_[2]->{graph}->set_edge_attribute( $_[1]->{last}, $_[2], 'bond', ':' ); } delete $_[2]->{first_of_chain}; _push_chirality_neighbour( $_[1]->{last}, $_[2] ); _push_chirality_neighbour( $_[2], $_[1]->{last} ); $_[1]->{last} = $_[2]; return $_[1]; } | chain bond atom { $_[3]->{graph} = $_[1]->{last}{graph}; $_[3]->{index} = $_[1]->{last}{index}; if( $_[2] ne '-' ) { $_[3]->{graph}->set_edge_attribute( $_[1]->{last}, $_[3], 'bond', $_[2] ); } else { $_[3]->{graph}->add_edge( $_[1]->{last}, $_[3] ); } delete $_[3]->{first_of_chain}; _push_chirality_neighbour( $_[1]->{last}, $_[3] ); _push_chirality_neighbour( $_[3], $_[1]->{last} ); $_[1]->{last} = $_[3]; return $_[1]; } | chain '.' atom { my $g = Graph::Undirected->new( refvertexed => 1 ); $g->add_vertex( $_[3] ); push @{$_[0]->{USER}{GRAPHS}}, $g; $_[3]->{graph} = $g; $_[3]->{index} = @{$_[0]->{USER}{GRAPHS}}-1; $_[3]->{first_of_chain} = 1; return { first => $_[3], last => $_[3] }; } | chain '(' chain ')' { if( $_[1]->{last}{index} != $_[3]->{first}{index} ) { $_[0]->_merge_graphs( $_[1]->{last}{index}, $_[3]->{first}{index} ); } $_[1]->{last}{graph}->add_edge( $_[1]->{last}, $_[3]->{first} ); if( is_aromatic $_[1]->{last} && is_aromatic $_[3]->{first} ) { $_[1]->{last}{graph}->set_edge_attribute( $_[1]->{last}, $_[3]->{first}, 'bond', ':' ); } delete $_[3]->{first}{first_of_chain}; _push_chirality_neighbour( $_[1]->{last}, $_[3]->{first} ); _unshift_chirality_neighbour( $_[3]->{first}, $_[1]->{last} ); return $_[1]; } | chain '(' bond chain ')' { if( $_[1]->{last}{index} != $_[4]->{first}{index} ) { $_[0]->_merge_graphs( $_[1]->{last}{index}, $_[4]->{first}{index} ); } if( $_[3] ne '-' ) { $_[1]->{last}{graph}->set_edge_attribute( $_[1]->{last}, $_[4]->{first}, 'bond', $_[3] ); } else { $_[1]->{last}{graph}->add_edge( $_[1]->{last}, $_[4]->{first} ); } delete $_[4]->{first}{first_of_chain}; _push_chirality_neighbour( $_[1]->{last}, $_[4]->{first} ); _unshift_chirality_neighbour( $_[4]->{first}, $_[1]->{last} ); return $_[1]; } | chain '(' '.' chain ')' # According to the specification of OpenSMILES, ring bonds are # allowed only before the branch enumeration. However, I think this # is too strict. | chain ringbond { $_[0]->_add_ring_bond( $_[1]->{last}, $_[2] ); return $_[1]; } | chain bond ringbond { $_[0]->_add_ring_bond( $_[1]->{last}, $_[3], $_[2] ); return $_[1]; } ; bond: '-' | '=' | '#' | '$' | ':' | '/' | '\\' ; %% # Footer section sub _Error { my( $self ) = @_; close $self->{USER}{FILEIN} if $self->{USER}{FILEIN}; if( ${$self->{TOKEN}} eq '' && grep { defined $_ && !ref $_ && $_ eq '(' } map { $_->[1] } @{$self->{STACK}} ) { die "$0: syntax error: missing closing parenthesis.\n"; } if( ${$self->{TOKEN}} eq ')' ) { die "$0: syntax error: unbalanced parentheses.\n"; } my $msg = "$0: syntax error at position $self->{USER}{CHARNO}"; if( $self->YYData->{INPUT} ) { $self->YYData->{INPUT} =~ s/\n$//; die "$msg: '" . $self->YYData->{INPUT} . "'.\n"; } else { die "$msg.\n"; } } sub _Lexer { my( $self ) = @_; # If the line is empty and the input is originating from the file, # another line is read. if( !$self->YYData->{INPUT} && $self->{USER}{FILEIN} ) { my $filein = $self->{USER}{FILEIN}; $self->YYData->{INPUT} = <$filein>; $self->{USER}{CHARNO} = 0; } if( $self->YYData->{INPUT} =~ s/^(\s+)// ) { $self->{USER}{CHARNO} += length $1; } my $hcount_re = 'H[0-9]?'; if( defined $self->{USER}{OPTIONS}{max_hydrogen_count_digits} ) { $hcount_re = sprintf 'H[0-9]{0,%d}', $self->{USER}{OPTIONS}{max_hydrogen_count_digits}; } # Bracket atoms if( $self->YYData->{INPUT} =~ s/^\[ (?[0-9]+)? (?[A-Za-z][a-z]?|\*) (?@( (TH|AL)[12] | SP [123] | (TB|OH)[0-9]{1,2} | @? ))? (? $hcount_re)? (?--|\+\+|[-+][0-9]{0,2})? (:(?[0-9]+))? \]//x ) { my $atom = { %+, number => $self->{USER}{ATOMNO} }; $self->{USER}{ATOMNO} ++; $self->{USER}{CHARNO} += length $&; # Check for existence of the seen element # Due to https://github.com/briandfoy/chemistry-elements/issues/16, Chemistry::Elements < 1.079 has 'Ha' instead of 'Db' if( $atom->{symbol} eq '*' || $atom->{symbol} eq 'Db' ) { # OK } elsif( $atom->{symbol} eq lc $atom->{symbol} && $atom->{symbol} !~ /^(as|se|[bcnops])$/ ) { die "aromatic chemical element '$atom->{symbol}' is not allowed\n"; } elsif( $atom->{symbol} eq 'Ha' || !Chemistry::Elements->new( $atom->{symbol} ) ) { die "chemical element with symbol '$atom->{symbol}' is unknown\n"; } if( $atom->{charge} ) { $atom->{charge} =~ s/^([-+])$/${1}1/; $atom->{charge} =~ s/^([-+])\1$/${1}2/; $atom->{charge} = int $atom->{charge}; } if( $atom->{hcount} ) { $atom->{hcount} =~ s/^H//; $atom->{hcount} = $atom->{hcount} ? int $atom->{hcount} : 1; } else { $atom->{hcount} = 0; } if( $atom->{isotope} ) { $atom->{isotope} = int $atom->{isotope}; } # Atom class is an arbitrary number, 0 by default $atom->{class} = exists $atom->{class} ? int $atom->{class} : 0; return ( 'atom', $atom ); } # Bracketless atoms if( $self->YYData->{INPUT} =~ s/^(Br|Cl|[BCINOPSFbcnops*])// ) { my $atom = { symbol => $1, class => 0, number => $self->{USER}{ATOMNO} }; $self->{USER}{ATOMNO} ++; $self->{USER}{CHARNO} += length $&; return ( 'atom', $atom ); } # Ring bonds if( $self->YYData->{INPUT} =~ s/^%([0-9]{2})// || $self->YYData->{INPUT} =~ s/^([0-9])// ) { $self->{USER}{CHARNO} += length $&; return ( 'ringbond', int $1 ); } my $char = substr( $self->YYData->{INPUT}, 0, 1 ); if( $char ne '' ) { $self->YYData->{INPUT} = substr( $self->YYData->{INPUT}, 1 ); } $self->{USER}{CHARNO} ++; return( $char, $char ); } sub parse { my( $self, $string, $options ) = @_; $options = {} unless $options; $self->YYData->{INPUT} = $string; $self->{USER}{GRAPHS} = []; $self->{USER}{RINGBONDS} = {}; $self->{USER}{ATOMNO} = 0; $self->{USER}{CHARNO} = 0; $self->{USER}{OPTIONS} = $options; $self->YYParse( yylex => \&_Lexer, yyerror => \&_Error, yydebug => $options->{debug} ); if( scalar keys %{$self->{USER}{RINGBONDS}} ) { die "$0: unclosed ring bond(s) detected: " . join( ', ', sort { $a <=> $b } keys %{$self->{USER}{RINGBONDS}} ) . ".\n"; } my @graphs = grep { defined } @{$self->{USER}{GRAPHS}}; for my $graph (@graphs) { for my $atom (sort { $a->{number} <=> $b->{number} } $graph->vertices) { delete $atom->{graph}; delete $atom->{index}; if( !$options->{raw} ) { # Promote implicit hydrogen atoms into explicit ones if( !exists $atom->{hcount} ) { next if !exists $normal_valence{$atom->{symbol}}; my $degree = sum0 map { $_ ne ':' && exists $bond_symbol_to_order{$_} ? $bond_symbol_to_order{$_} : 1 } map { $graph->has_edge_attribute( $atom, $_, 'bond' ) ? $graph->get_edge_attribute( $atom, $_, 'bond' ) : '-' } $graph->neighbours( $atom ); my $valence = first { $degree <= $_ } @{$normal_valence{$atom->{symbol}}}; next unless defined $valence; $atom->{hcount} = $valence - $degree; } for (1..$atom->{hcount}) { my $hydrogen = { symbol => 'H', class => 0, number => $self->{USER}{ATOMNO} }; $graph->add_edge( $atom, $hydrogen ); $self->{USER}{ATOMNO} ++; if( $atom->{first_of_chain} ) { _unshift_chirality_neighbour( $atom, $hydrogen ); } else { _push_chirality_neighbour( $atom, $hydrogen ); } } delete $atom->{hcount}; # Unify the representation of chirality if( is_chiral $atom ) { if( $atom->{chirality} =~ /^@@?$/ ) { if( $graph->degree( $atom ) == 2 ) { $atom->{chirality} =~ s/@+/'@AL' . length $&/e; } elsif( $graph->degree( $atom ) == 5 ) { $atom->{chirality} =~ s/@+/'@TB' . length $&/e; } elsif( $graph->degree( $atom ) == 6 ) { $atom->{chirality} =~ s/@+/'@OH' . length $&/e; } } $atom->{chirality} =~ s/^\@TH1$/@/; $atom->{chirality} =~ s/^\@TH2$/@@/; } # Adjust chirality for centers having lone pairs if( is_chiral $atom && $atom->{first_of_chain} && $atom->{chirality} =~ /^@@?$/ && $atom->{chirality_neighbours} && scalar @{$atom->{chirality_neighbours}} == 3 ) { $atom->{chirality} = $atom->{chirality} eq '@' ? '@@' : '@'; } } delete $atom->{first_of_chain}; } } return @graphs; } sub _add_ring_bond { my( $self, $atom, $ring_bond, $bond ) = @_; if( $self->{USER}{RINGBONDS}{$ring_bond} ) { $self->_merge_graphs( $self->{USER}{RINGBONDS}{$ring_bond}{atom}{index}, $atom->{index} ); if( $bond && $self->{USER}{RINGBONDS}{$ring_bond}{bond} && (($bond !~ /^[\\\/]$/ && $bond ne $self->{USER}{RINGBONDS}{$ring_bond}{bond}) || ($bond eq '\\' && $self->{USER}{RINGBONDS}{$ring_bond}{bond} ne '/') || ($bond eq '/' && $self->{USER}{RINGBONDS}{$ring_bond}{bond} ne '\\')) ) { die "$0: ring bond types for ring bond $ring_bond do not match.\n"; } $bond = first { defined } ( $self->{USER}{RINGBONDS}{$ring_bond}{bond}, $bond ); if( $bond && $bond =~ /^[\\\/]$/ && !defined $self->{USER}{RINGBONDS}{$ring_bond}{bond} ) { # If cis/trans marker is not specified when cis/trans bond is # seen first, it has to be inverted: $bond = toggle_cistrans $bond; } my $ring_atom = $self->{USER}{RINGBONDS}{$ring_bond}{atom}; die "atom cannot be bonded to itself\n" if $atom == $ring_atom; if( !$bond && is_aromatic $ring_atom && is_aromatic $atom ) { $bond = ':'; } if( $bond && $bond ne '-' ) { $atom->{graph}->set_edge_attribute( $ring_atom, $atom, 'bond', $bond ); } else { $atom->{graph}->add_edge( $ring_atom, $atom ); } delete $self->{USER}{RINGBONDS}{$ring_bond}; if( is_chiral $ring_atom && $ring_atom->{chirality_neighbours} ) { my $pos = first { !ref $ring_atom->{chirality_neighbours}[$_] && $ring_atom->{chirality_neighbours}[$_] == $ring_bond } 0..$#{$ring_atom->{chirality_neighbours}}; $ring_atom->{chirality_neighbours}[$pos] = $atom if defined $pos; } _push_chirality_neighbour( $atom, $ring_atom ); } else { $self->{USER}{RINGBONDS}{$ring_bond} = { atom => $atom, $bond ? ( bond => $bond ) : () }; # Record a placeholder for later addition of real chirality # neighbour, which will be identified by the ring bond number _push_chirality_neighbour( $atom, $ring_bond ); } } sub _merge_graphs { my( $self, $index1, $index2 ) = @_; return if $index1 == $index2; my $g1 = $self->{USER}{GRAPHS}[$index1]; my $g2 = $self->{USER}{GRAPHS}[$index2]; for ($g2->vertices) { $_->{graph} = $g1; $_->{index} = $index1; } $g1->add_vertices( $g2->vertices ); for ($g2->edges) { my $attributes = $g2->get_edge_attributes( @$_ ); if( $attributes ) { $g1->set_edge_attributes( @$_, $attributes ); } else { $g1->add_edge( @$_ ); } } $self->{USER}{GRAPHS}[$index2] = undef; } sub _push_chirality_neighbour { my( $atom1, $atom2 ) = @_; return unless is_chiral $atom1; push @{$atom1->{chirality_neighbours}}, $atom2; } sub _unshift_chirality_neighbour { my( $atom1, $atom2 ) = @_; return unless is_chiral $atom1; unshift @{$atom1->{chirality_neighbours}}, $atom2; } 1; Chemistry-OpenSMILES-0.11.6/lib/Chemistry/OpenSMILES/Stereo/0000775000200400020040000000000014753573665023063 5ustar andriusandriusChemistry-OpenSMILES-0.11.6/lib/Chemistry/OpenSMILES/Stereo/Tables.pm0000644000200400020040000000526114753573665024635 0ustar andriusandriuspackage Chemistry::OpenSMILES::Stereo::Tables; # ABSTRACT: Stereochemistry tables our $VERSION = '0.11.6'; # VERSION use strict; use warnings; require Exporter; our @ISA = qw( Exporter ); our @EXPORT_OK = qw( @OH @TB ); our @TB = ( { axis => [ 1, 5 ], order => '@' }, { axis => [ 1, 5 ], order => '@@' }, { axis => [ 1, 4 ], order => '@' }, { axis => [ 1, 4 ], order => '@@' }, { axis => [ 1, 3 ], order => '@' }, { axis => [ 1, 3 ], order => '@@' }, { axis => [ 1, 2 ], order => '@' }, { axis => [ 1, 2 ], order => '@@' }, { axis => [ 2, 5 ], order => '@' }, { axis => [ 2, 4 ], order => '@' }, { axis => [ 2, 5 ], order => '@@' }, { axis => [ 2, 4 ], order => '@@' }, { axis => [ 2, 3 ], order => '@' }, { axis => [ 2, 3 ], order => '@@' }, { axis => [ 3, 5 ], order => '@' }, { axis => [ 3, 4 ], order => '@' }, { axis => [ 4, 5 ], order => '@' }, { axis => [ 4, 5 ], order => '@@' }, { axis => [ 3, 4 ], order => '@@' }, { axis => [ 3, 5 ], order => '@@' }, ); our @OH = ( { shape => 'U', axis => [ 1, 6 ], order => '@' }, { shape => 'U', axis => [ 1, 6 ], order => '@@' }, { shape => 'U', axis => [ 1, 5 ], order => '@' }, { shape => 'Z', axis => [ 1, 6 ], order => '@' }, { shape => 'Z', axis => [ 1, 5 ], order => '@' }, { shape => 'U', axis => [ 1, 4 ], order => '@' }, { shape => 'Z', axis => [ 1, 4 ], order => '@' }, { shape => '4', axis => [ 1, 6 ], order => '@@' }, { shape => '4', axis => [ 1, 5 ], order => '@@' }, { shape => '4', axis => [ 1, 6 ], order => '@' }, { shape => '4', axis => [ 1, 5 ], order => '@' }, { shape => '4', axis => [ 1, 4 ], order => '@@' }, { shape => '4', axis => [ 1, 4 ], order => '@' }, { shape => 'Z', axis => [ 1, 6 ], order => '@@' }, { shape => 'Z', axis => [ 1, 5 ], order => '@@' }, { shape => 'U', axis => [ 1, 5 ], order => '@@' }, { shape => 'Z', axis => [ 1, 4 ], order => '@@' }, { shape => 'U', axis => [ 1, 4 ], order => '@@' }, { shape => 'U', axis => [ 1, 3 ], order => '@' }, { shape => 'Z', axis => [ 1, 3 ], order => '@' }, { shape => '4', axis => [ 1, 3 ], order => '@@' }, { shape => '4', axis => [ 1, 3 ], order => '@' }, { shape => 'Z', axis => [ 1, 3 ], order => '@@' }, { shape => 'U', axis => [ 1, 3 ], order => '@@' }, { shape => 'U', axis => [ 1, 2 ], order => '@' }, { shape => 'Z', axis => [ 1, 2 ], order => '@' }, { shape => '4', axis => [ 1, 2 ], order => '@@' }, { shape => '4', axis => [ 1, 2 ], order => '@' }, { shape => 'Z', axis => [ 1, 2 ], order => '@@' }, { shape => 'U', axis => [ 1, 2 ], order => '@@' }, ); 1; Chemistry-OpenSMILES-0.11.6/lib/Chemistry/OpenSMILES/Aromaticity.pm0000644000200400020040000001426714753573665024455 0ustar andriusandriuspackage Chemistry::OpenSMILES::Aromaticity; use strict; use warnings; # ABSTRACT: Aromaticity handling routines our $VERSION = '0.11.6'; # VERSION use Chemistry::OpenSMILES qw( is_aromatic is_aromatic_bond is_double_bond is_single_bond ); use Graph::Traversal::DFS; use List::Util qw( all first ); =head1 NAME Chemistry::OpenSMILES::Aromaticity - Aromaticity handling routines =head1 DESCRIPTION Chemistry::OpenSMILES::Aromaticity encodes some aromaticity handling subroutines for aromatisation and kekulisation. Both implementations are experimental, handle only some specific cases and are neither stable nor bug-free, thus should be used with caution. =cut require Exporter; our @ISA = qw( Exporter ); our @EXPORT_OK = qw( aromatise electron_cycles kekulise ); =head1 METHODS =over 4 =item aromatise( $moiety ) Mark electron cycles as aromatic. =cut sub aromatise { my( $moiety ) = @_; my @electron_cycles = electron_cycles( $moiety ); for my $cycle (@electron_cycles) { for my $i (0..$#$cycle) { # Set bond to aromatic $moiety->set_edge_attribute( $cycle->[$i], $cycle->[($i + 1) % scalar @$cycle], 'bond', ':' ); # Set atom to aromatic if( $cycle->[$i]{symbol} =~ /^([BCNOPS]|Se|As)$/ ) { $cycle->[$i]{symbol} = lcfirst $cycle->[$i]{symbol}; } } } } =item kekulise( $moiety, $order_sub ) Find nonfused even-length aromatic cycles consisting only of B, C, N, P, S and mark them with alternating single and double bonds. Subroutine as well accepts an optional subroutine reference C<$order_sub>, providing external order for atoms. This is needed to stabilise the algorithm, as otherwise the outcomes of bond assignment may turn out different. C<$order_sub> is called with an atom as C<$_[0]> and is expected to return a value providing a distinct order indication for every atom. These can be any scalar values, comparable using Perl's C operator. If C<$order_sub> is not given, initial atom order in input is consulted. =cut sub kekulise { my( $moiety, $order_sub ) = @_; $order_sub = sub { $_[0]->{number} } unless $order_sub; my $aromatic_only = $moiety->copy_graph; $aromatic_only->delete_vertices( grep { !is_aromatic $_ } $aromatic_only->vertices ); $aromatic_only->delete_edges( map { @$_ } grep { !is_aromatic_bond( $moiety, @$_ ) } $aromatic_only->edges ); for my $component ($aromatic_only->connected_components) { # Taking only simple even-length cycles into consideration next unless all { $aromatic_only->degree( $_ ) == 2 } @$component; next unless all { $moiety->degree( $_ ) <= 3 } @$component; next unless all { $_->{symbol} =~ /^[BCNPS]$/i } @$component; next if @$component % 2; my( $first ) = sort { $order_sub->($a) cmp $order_sub->($b) } @$component; my( $second ) = sort { $order_sub->($a) cmp $order_sub->($b) } $aromatic_only->neighbours( $first ); for my $i (0..$#$component) { $first->{symbol} = ucfirst $first->{symbol}; if( $i % 2 ) { $moiety->set_edge_attribute( $first, $second, 'bond', '=' ); } else { $moiety->delete_edge_attribute( $first, $second, 'bond' ); } ( $first, $second ) = ( $second, first { $_ ne $first } $aromatic_only->neighbours( $second ) ); } } } =item electron_cycles( $moiety ) Find electron cycles according to "Finding Electron Cycles" algorithm from L. Use with caution: the implementation is experimental. =cut sub electron_cycles { my( $moiety ) = @_; my @cycles; for my $start ($moiety->vertices) { my %seen; my %prev; my $operations = { start => sub { $start }, pre => sub { $seen{$_[0]} = 1 }, pre_edge => sub { my( $u, $v ) = @_; ( $u, $v ) = ( $v, $u ) if $seen{$v}; $prev{$v} = $u; }, non_tree_edge => sub { my( $u, $v ) = @_; if( $u == $start || $v == $start ) { ( $u, $v ) = ( $v, $u ) if $v == $start; my $current = $v; my $prev_bond_is_single; my $cycle_is_alterating = 1; my @cycle = ( $u ); while( $prev{$current} ) { if( ( !defined $prev_bond_is_single && ( is_single_bond( $moiety, $current, $prev{$current} ) || is_double_bond( $moiety, $current, $prev{$current} ) ) ) || ( $prev_bond_is_single && is_double_bond( $moiety, $current, $prev{$current} ) ) || ( !$prev_bond_is_single && is_single_bond( $moiety, $current, $prev{$current} ) ) ) { # Logic is inverted here as $prev_bond_is_single is # inverted after the conditional. $prev_bond_is_single = !is_single_bond( $moiety, $current, $prev{$current} ); push @cycle, $current; $current = $prev{$current}; } else { $cycle_is_alterating = 0; last; } last unless $cycle_is_alterating; $prev_bond_is_single = 1 - $prev_bond_is_single; } push @cycles, \@cycle if $cycle_is_alterating; } }, }; my $traversal = Graph::Traversal::DFS->new( $moiety, %$operations ); $traversal->dfs; } my %unique; for (@cycles) { $unique{join '', sort @$_} = $_; } return values %unique; } =back =cut 1; Chemistry-OpenSMILES-0.11.6/lib/Chemistry/OpenSMILES/Writer.pm0000644000200400020040000004704214753573665023441 0ustar andriusandriuspackage Chemistry::OpenSMILES::Writer; # ABSTRACT: OpenSMILES format writer our $VERSION = '0.11.6'; # VERSION use strict; use warnings; use Chemistry::OpenSMILES qw( %bond_symbol_to_order %normal_valence is_aromatic is_chiral toggle_cistrans valence ); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Stereo::Tables qw( @OH @TB ); use Graph::Traversal::DFS; use List::Util qw( all any first min none sum0 uniq ); require Exporter; our @ISA = qw( Exporter ); our @EXPORT_OK = qw( write_SMILES ); my %shape_to_SP = ( 'U' => '@SP1', '4' => '@SP2', 'Z' => '@SP3' ); my %SP_to_shape = reverse %shape_to_SP; # write_SMILES() does not necessary respect the order subroutine: if performs DFS guided by the requested order. # Thus before calling write_SMILES(), the exact post-order is not known. # Only pre-order is known, thus relative properties, such as cis/trans markers, have to be adjusted to pre-order. # Thus order-dependent markers have to be adjusted to pre-order. sub write_SMILES { my( $what, $options ) = @_; # Backwards compatibility with the old API where second parameter was # a subroutine reference for ordering: my $order_sub = defined $options && ref $options eq 'CODE' ? $options : \&_order; $options = {} unless defined $options && ref $options eq 'HASH'; $order_sub = $options->{order_sub} if $options->{order_sub}; my $raw = $options->{raw}; # Subroutine will also accept and properly represent a single atom: return _pre_vertex( $what, undef, { raw => $raw } ) if ref $what eq 'HASH'; my @moieties = ref $what eq 'ARRAY' ? @$what : ( $what ); my @components; for my $graph (@moieties) { my @symbols; my %vertex_symbols; my $nrings = 0; my %seen_rings; my @chiral; my %discovered_from; my $rings = {}; my $operations = { tree_edge => sub { my( $seen, $unseen, $self ) = @_; if( $vertex_symbols{$unseen} ) { ( $seen, $unseen ) = ( $unseen, $seen ); } push @symbols, _tree_edge( $seen, $unseen, $self ); $discovered_from{$unseen} = $seen }, non_tree_edge => sub { my @sorted = sort { $vertex_symbols{$a} <=> $vertex_symbols{$b} } @_[0..1]; $rings->{$vertex_symbols{$_[0]}} {$vertex_symbols{$_[1]}} = $rings->{$vertex_symbols{$_[1]}} {$vertex_symbols{$_[0]}} = _depict_bond( @sorted, $graph ) }, pre => sub { my( $vertex, $dfs ) = @_; push @chiral, $vertex if is_chiral $vertex; push @symbols, _pre_vertex( $vertex, $graph, { omit_chirality => 1, raw => $raw } ); $vertex_symbols{$vertex} = $#symbols }, post => sub { push @symbols, ')' }, next_root => undef, }; $operations->{first_root} = sub { $order_sub->( $_[1], $_[0]->graph ) }; $operations->{next_successor} = sub { $order_sub->( $_[1], $_[0]->graph ) }; my $traversal = Graph::Traversal::DFS->new( $graph, %$operations ); $traversal->dfs; if( scalar keys %vertex_symbols != scalar $graph->vertices ) { warn scalar( $graph->vertices ) - scalar( keys %vertex_symbols ) . ' unreachable atom(s) detected in moiety' . "\n"; } next unless @symbols; pop @symbols; # Dealing with chirality for my $atom (@chiral) { next unless $atom->{chirality} =~ /^@(@?|SP[123]|TB1?[1-9]|TB20|OH[1-9]|OH[12][0-9]|OH30)$/; my @neighbours = $graph->neighbours($atom); my $has_lone_pair; if( $atom->{chirality} =~ /^@(@?|SP[123])$/ ) { if( scalar @neighbours < 3 || scalar @neighbours > 4 ) { warn "chirality '$atom->{chirality}' observed for atom " . 'with ' . scalar @neighbours . ' neighbours, can only ' . 'process tetrahedral chiral or square planar centers ' . 'with possible lone pairs' . "\n"; next; } $has_lone_pair = @neighbours == 3; } if( $atom->{chirality} =~ /^\@TB..?$/ ) { if( scalar @neighbours < 4 || scalar @neighbours > 5 ) { warn "chirality '$atom->{chirality}' observed for atom " . 'with ' . scalar @neighbours . ' neighbours, can only ' . 'process trigonal bipyramidal centers ' . 'with possible lone pairs' . "\n"; next; } $has_lone_pair = @neighbours == 4; } if( $atom->{chirality} =~ /^\@OH..?$/ ) { if( scalar @neighbours < 5 || scalar @neighbours > 6 ) { warn "chirality '$atom->{chirality}' observed for atom " . 'with ' . scalar @neighbours . ' neighbours, can only ' . 'process octahedral centers ' . 'with possible lone pairs' . "\n"; next; } $has_lone_pair = @neighbours == 5; } my $chirality_now = $atom->{chirality}; if( $atom->{chirality_neighbours} ) { if( scalar @neighbours != scalar @{$atom->{chirality_neighbours}} ) { warn 'number of neighbours does not match the length ' . "of 'chirality_neighbours' array, cannot process " . 'such chiral centers' . "\n"; next; } my %indices; for (0..$#{$atom->{chirality_neighbours}}) { my $pos = $_; if( $has_lone_pair && $_ != 0 ) { # Lone pair is always second in the chiral neighbours array $pos++; } $indices{$vertex_symbols{$atom->{chirality_neighbours}[$_]}} = $pos; } my @order_new; # In the newly established order, the atom from which this one # is discovered (left hand side) will be the first, if any if( $discovered_from{$atom} ) { push @order_new, $indices{$vertex_symbols{$discovered_from{$atom}}}; } # Second, there will be ring bonds as they are added before all of the neighbours if( $rings->{$vertex_symbols{$atom}} ) { push @order_new, map { $indices{$_} } sort { $a <=> $b } keys %{$rings->{$vertex_symbols{$atom}}}; } # Finally, all neighbours are added, uniq will remove duplicates push @order_new, map { $indices{$_} } sort { $a <=> $b } map { $vertex_symbols{$_} } @neighbours; @order_new = uniq @order_new; if( $has_lone_pair ) { # Accommodate the lone pair if( $discovered_from{$atom} ) { @order_new = ( $order_new[0], 1, @order_new[1..$#order_new] ); } else { unshift @order_new, 1; } } if( $atom->{chirality} =~ /^@@?$/ ) { # Tetragonal centers if( join( '', _permutation_order( @order_new ) ) ne '0123' ) { $chirality_now = $chirality_now eq '@' ? '@@' : '@'; } } elsif( $atom->{chirality} =~ /^\@SP[123]$/ ) { # Square planar centers $chirality_now = _square_planar_chirality( @order_new, $chirality_now ); } elsif( $atom->{chirality} =~ /^\@TB..?$/ ) { # Trigonal bipyramidal centers $chirality_now = _trigonal_bipyramidal_chirality( @order_new, $chirality_now ); } else { # Octahedral centers $chirality_now = _octahedral_chirality( @order_new, $chirality_now ); } } my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph_reparsed ) = $parser->parse( $symbols[$vertex_symbols{$atom}], { raw => 1 } ); my( $atom_reparsed ) = $graph_reparsed->vertices; $atom_reparsed->{chirality} = $chirality_now; $symbols[$vertex_symbols{$atom}] = write_SMILES( $atom_reparsed ); } # Adding ring numbers my @ring_ids = ( 1..99, 0 ); my @ring_ends; for my $i (0..$#symbols) { if( $rings->{$i} ) { for my $j (sort { $a <=> $b } keys %{$rings->{$i}}) { next if $i > $j; if( !@ring_ids ) { # All 100 rings are open now. # There is no other solution but to terminate the program. die 'cannot represent more than 100 open ring bonds' . "\n"; } $symbols[$i] .= $rings->{$i}{$j} . ($ring_ids[0] < 10 ? '' : '%') . $ring_ids[0]; $symbols[$j] .= ($rings->{$i}{$j} eq '/' ? '\\' : $rings->{$i}{$j} eq '\\' ? '/' : $rings->{$i}{$j}) . ($ring_ids[0] < 10 ? '' : '%') . $ring_ids[0]; push @{$ring_ends[$j]}, shift @ring_ids; } } if( $ring_ends[$i] ) { # Ring bond '0' must stay in the end @ring_ids = sort { ($a == 0) - ($b == 0) || $a <=> $b } (@{$ring_ends[$i]}, @ring_ids); } } push @components, join '', @symbols; } return join '.', @components; } # DEPRECATED sub write { &write_SMILES } sub _tree_edge { my( $u, $v, $self ) = @_; return '(' . _depict_bond( $u, $v, $self->graph ); } sub _pre_vertex { my( $vertex, $graph, $options ) = @_; $options = {} unless $options; my( $omit_chirality, $raw ) = ( $options->{omit_chirality}, $options->{raw} ); my $atom = $vertex->{symbol}; my $is_simple = $atom =~ /^[bcnosp]$/i || $atom =~ /^(F|Cl|Br|I|\*)$/; if( exists $vertex->{isotope} ) { $atom = $vertex->{isotope} . $atom; $is_simple = 0; } if( is_chiral $vertex && !$omit_chirality ) { $atom .= $vertex->{chirality}; $is_simple = 0; } if( $vertex->{hcount} ) { # if non-zero $atom .= 'H' . ($vertex->{hcount} == 1 ? '' : $vertex->{hcount}); $is_simple = 0; } $is_simple = 0 if $raw && exists $vertex->{hcount}; if( $vertex->{charge} ) { # if non-zero $atom .= ($vertex->{charge} > 0 ? '+' : '') . $vertex->{charge}; $atom =~ s/([-+])1$/$1/; $is_simple = 0; } if( $vertex->{class} ) { # if non-zero $atom .= ':' . $vertex->{class}; $is_simple = 0; } # Decide whether to put atom in square brackets because of unusual valence if( $is_simple && $graph && !$raw && $normal_valence{ucfirst $atom} ) { my $valence = valence( $graph, $vertex ); $is_simple = any { $_ == $valence } @{$normal_valence{ucfirst $atom}}; } return $is_simple ? $atom : "[$atom]"; } # _depict_bond() gets vertices in order of their appearance in the post-order. # It flips '/' <=> '\' if post-order is opposite from pre-order. sub _depict_bond { my( $u, $v, $graph ) = @_; if( !$graph->has_edge_attribute( $u, $v, 'bond' ) ) { return is_aromatic $u && is_aromatic $v ? '-' : ''; } my $bond = $graph->get_edge_attribute( $u, $v, 'bond' ); return $bond if $bond ne '/' && $bond ne '\\'; return $bond if $u->{number} < $v->{number}; return toggle_cistrans $bond; } # Reorder a permutation of elements 0, 1, 2 and 3 by taking an element # and moving it two places either forward or backward in the line. This # subroutine is used to check whether a sign change of tetragonal # chirality is required or not. sub _permutation_order { # Safeguard against endless cycles due to undefined values if( (scalar @_ != 4) || (any { !defined || !/^[0-3]$/ } @_) || (join( ',', sort @_ ) ne '0,1,2,3') ) { warn '_permutation_order() accepts only permutations of numbers ' . "'0', '1', '2' and '3', unexpected input received"; return 0..3; # Return original order } while( $_[2] == 0 || $_[3] == 0 ) { @_ = ( $_[0], @_[2..3], $_[1] ); } if( $_[0] != 0 ) { @_ = ( @_[1..2], $_[0], $_[3] ); } while( $_[1] != 1 ) { @_[1..3] = ( @_[2..3], $_[1] ); } return @_; } sub _square_planar_chirality { my $chirality = pop @_; my @source = 0..3; my @target = @_; if( join( ',', sort @_ ) ne '0,1,2,3' ) { die '_square_planar_chirality() accepts only permutations of ' . "numbers '0', '1', '2' and '3', unexpected input received\n"; } # Rotations until 0 is first while( $source[0] != $target[0] ) { push @source, shift @source; my %tab = ( '@SP1' => '@SP1', '@SP2' => '@SP3', '@SP3' => '@SP2' ); $chirality = $tab{$chirality}; } if( $source[3] == $target[1] ) { # Swap the right side ( $source[2], $source[3] ) = ( $source[3], $source[2] ); my %tab = ( '@SP1' => '@SP3', '@SP2' => '@SP2', '@SP3' => '@SP1' ); $chirality = $tab{$chirality}; } if( $source[2] == $target[1] ) { # Swap the center ( $source[1], $source[2] ) = ( $source[2], $source[1] ); my %tab = ( '@SP1' => '@SP2', '@SP2' => '@SP1', '@SP3' => '@SP3' ); $chirality = $tab{$chirality}; } if( $source[3] == $target[2] ) { # Swap the right side ( $source[2], $source[3] ) = ( $source[3], $source[2] ); my %tab = ( '@SP1' => '@SP3', '@SP2' => '@SP2', '@SP3' => '@SP1' ); $chirality = $tab{$chirality}; } return $chirality; } sub _trigonal_bipyramidal_chirality { my $chirality = pop @_; my @target = @_; if( join( ',', sort @target ) ne '0,1,2,3,4' ) { die '_trigonal_bipyramidal_chirality() accepts only permutations of ' . "numbers '0', '1', '2', '3' and '4', unexpected input received\n"; } $chirality =~ s/^\@TB//; $chirality = int $chirality; my $TB = $TB[$chirality - 1]; # First on, decode the source. # Axis will stay on @axis, and sides will be stored on @sides my @axis = map { $_ - 1 } @{$TB->{axis}}; my @sides = grep { $_ != $axis[0] && $_ != $axis[1] } 0..4; # Find the new location of the axis, remove it from @target my @axis_location = ( ( first { $target[$_] == $axis[0] } 0..4 ), ( first { $target[$_] == $axis[1] } 0..4 ) ); @target = grep { $_ != $axis[0] && $_ != $axis[1] } @target; # Invert the axis if needed if( $axis_location[0] > $axis_location[1] ) { @axis_location = reverse @axis_location; @target = reverse @target; } # Cycle the sides clockwise until the first is aligned while( $sides[0] != $target[0] ) { push @sides, shift @sides; } my $order = $TB->{order}; $order = $order eq '@' ? '@@' : '@' unless $sides[1] == $target[1]; $chirality = 1 + first { $TB[$_]->{order} eq $order && $TB[$_]->{axis}[0] == $axis_location[0] + 1 && $TB[$_]->{axis}[1] == $axis_location[1] + 1 } 0..$#TB; return '@TB' . $chirality; } sub _octahedral_chirality { my $chirality = pop @_; my @target = @_; if( join( ',', sort @target ) ne '0,1,2,3,4,5' ) { die '_octahedral_chirality() accepts only permutations of ' . "numbers '0', '1', '2', '3', '4' and '5, unexpected input received\n"; } $chirality =~ s/^\@OH//; $chirality = int $chirality; # First on, decode the source. # Axis will stay on @axis, and sides will be stored on @sides in contiguous clockwise order. my @axis = map { $_ - 1 } @{$OH[$chirality-1]->{axis}}; my @sides = grep { $_ != $axis[0] && $_ != $axis[1] } 0..5; if( $OH[$chirality-1]->{shape} eq 'Z' ) { ( $sides[2], $sides[3] ) = ( $sides[3], $sides[2] ); } if( $OH[$chirality-1]->{shape} eq '4' ) { ( $sides[0], $sides[3] ) = ( $sides[3], $sides[0] ); } # Adjust for enumeration direction @sides = reverse @sides if $OH[$chirality-1]->{order} eq '@'; # Align the axis start if( $axis[0] == $target[0] ) { # same axis start, do nothing } elsif( $axis[1] == $target[0] ) { # axis inversion @axis = reverse @axis; @sides = reverse @sides; } else { # axis start at one of the sides my $axis_index = first { $sides[$_] == $target[0] } 0..3; my @axis_now = ( $sides[$axis_index], $sides[($axis_index + 2) % 4] ); ( $sides[$axis_index], $sides[($axis_index + 2) % 4] ) = reverse @axis; @axis = @axis_now; } shift @target; # axis start is no longer needed my $axis_end = first { $target[$_] == $axis[1] } 0..4; @target = map { $target[$_] } grep { $_ != $axis_end } 0..4; # remove axis end # Cycle the sides clockwise until the first is aligned while( $sides[0] != $target[0] ) { push @sides, shift @sides; } shift @sides; shift @target; # Check the alignment of the other sides to find the shape and order my $shape; my $order; if( $target[0] == $sides[0] && $target[1] == $sides[1] ) { ( $shape, $order ) = ( 'U', '@@' ); } elsif( $target[0] == $sides[0] && $target[1] == $sides[2] ) { ( $shape, $order ) = ( 'Z', '@@' ); } elsif( $target[0] == $sides[1] && $target[1] == $sides[0] ) { ( $shape, $order ) = ( '4', '@' ); } elsif( $target[0] == $sides[1] && $target[1] == $sides[2] ) { ( $shape, $order ) = ( '4', '@@' ); } elsif( $target[0] == $sides[2] && $target[1] == $sides[0] ) { ( $shape, $order ) = ( 'Z', '@' ); } elsif( $target[0] == $sides[2] && $target[1] == $sides[1] ) { ( $shape, $order ) = ( 'U', '@' ); } else { die 'unexpected situation achieved in _octahedral_chirality()' . "\n"; } $chirality = 1 + first { $OH[$_]->{shape} eq $shape && $OH[$_]->{order} eq $order && $OH[$_]->{axis}[1] == $axis_end + 2 } 0..$#OH; return '@OH' . $chirality; } sub _order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$a}{number} <=> $vertices->{$b}{number} } keys %$vertices; return $vertices->{shift @sorted}; } 1; Chemistry-OpenSMILES-0.11.6/lib/Chemistry/OpenSMILES/Stereo.pm0000644000200400020040000005062714753573665023431 0ustar andriusandriuspackage Chemistry::OpenSMILES::Stereo; # ABSTRACT: Stereochemistry handling routines our $VERSION = '0.11.6'; # VERSION use strict; use warnings; use Chemistry::OpenSMILES qw( is_chiral is_chiral_octahedral is_chiral_planar is_chiral_tetrahedral is_chiral_trigonal_bipyramidal is_cis_trans_bond is_double_bond is_ring_bond is_single_bond toggle_cistrans ); use Chemistry::OpenSMILES::Stereo::Tables qw( @OH @TB ); use Chemistry::OpenSMILES::Writer qw( write_SMILES ); use Graph::Traversal::BFS; use Graph::Undirected; use List::Util qw( all any first max min sum sum0 uniq ); use Set::Object qw( set ); require Exporter; our @ISA = qw( Exporter ); our @EXPORT_OK = qw( chirality_to_pseudograph cis_trans_to_pseudoedges mark_all_double_bonds mark_cis_trans ); sub mark_all_double_bonds { my( $graph, $setting_sub, $order_sub, $color_sub ) = @_; my @double_bonds = grep { is_double_bond( $graph, @$_ ) } $graph->edges; if( ref $setting_sub eq 'ARRAY' ) { # List of double bonds with their setting are given @double_bonds = map { [ @{$_}[1..2] ] } @$setting_sub; my %cis = map { ( join( '', sort @{$_}[1..2] ) => { atoms => set( $_->[0], $_->[3] ), setting => $_->[4] } ) } @$setting_sub; $setting_sub = sub { my $key = join '', sort @_[1..2]; return undef unless exists $cis{$key}; my $setting = $cis{$key}->{setting}; return $setting unless ($cis{$key}->{atoms} * set( $_[0], $_[3] ))->size == 1; return $setting eq 'cis' ? 'trans' : 'cis'; }; } # By default, whenever there is a choice between atoms, the one with # lowest position in the input SMILES is chosen: $order_sub = sub { $_[0]->{number} } unless $order_sub; # Select non-ring double bonds @double_bonds = grep { !is_ring_bond( $graph, @$_ ) && !is_unimportant_double_bond( $graph, @$_, $color_sub ) } @double_bonds; return unless @double_bonds; # Construct a double bond incidence graph. Vertices are double bonds # and edges are between those double bonds that separated by a single # single ('-') bond. Interestingly, incidence graph for SMILES C=C(C)=C # is connected, but for C=C=C not. This is because allenal systems # cannot be represented yet. my $bond_graph = Graph::Undirected->new; my %incident_double_bonds; for my $bond (@double_bonds) { $bond_graph->add_vertex( join '', sort @$bond ); push @{$incident_double_bonds{$bond->[0]}}, $bond; push @{$incident_double_bonds{$bond->[1]}}, $bond; } for my $bond ($graph->edges) { next unless is_single_bond( $graph, @$bond ); my @adjacent_bonds; if( $incident_double_bonds{$bond->[0]} ) { push @adjacent_bonds, @{$incident_double_bonds{$bond->[0]}}; } if( $incident_double_bonds{$bond->[1]} ) { push @adjacent_bonds, @{$incident_double_bonds{$bond->[1]}}; } for my $bond1 (@adjacent_bonds) { for my $bond2 (@adjacent_bonds) { next if $bond1 == $bond2; $bond_graph->add_edge( join( '', sort @$bond1 ), join( '', sort @$bond2 ) ); } } } # In principle, bond graph could be splitted into separate components # to reduce the number of cycles needed by Morgan algorithm, but I do # not think there is a failure case because of keeping them together. # Set up initial invariants my %invariants; for ($bond_graph->vertices) { $invariants{$_} = $bond_graph->degree( $_ ); } my %distinct_invariants = map { $_ => 1 } values %invariants; # Perform Morgan algorithm while( 1 ) { my %invariants_now; for ($bond_graph->vertices) { $invariants_now{$_} = sum0 map { $invariants{$_} } $bond_graph->neighbours( $_ ); } my %distinct_invariants_now = map { $_ => 1 } values %invariants_now; last if %distinct_invariants_now <= %distinct_invariants; %invariants = %invariants_now; %distinct_invariants = %distinct_invariants_now; } # Establish a deterministic order favouring bonds with higher invariants. # If invariants are equal, order bonds by their atom numbers. @double_bonds = sort { $invariants{join '', sort @$b} <=> $invariants{join '', sort @$a} || (min map { $order_sub->($_) } @$a) <=> (min map { $order_sub->($_) } @$b) || (max map { $order_sub->($_) } @$a) <=> (max map { $order_sub->($_) } @$b) } @double_bonds; for (@double_bonds) { mark_cis_trans( $graph, @$_, $setting_sub, $order_sub ); } } # Requires double bonds in input. Does not check whether a bond belongs # to a ring or not. sub mark_cis_trans { my( $graph, $atom2, $atom3, $setting_sub, $order_sub ) = @_; # By default, whenever there is a choice between atoms, the one with # lowest position in the input SMILES is chosen: $order_sub = sub { $_[0]->{number} } unless $order_sub; my @neighbours2 = $graph->neighbours( $atom2 ); my @neighbours3 = $graph->neighbours( $atom3 ); return if @neighbours2 < 2 || @neighbours3 < 2; # TODO: Currently we are choosing either a pair of # neighbouring atoms which have no cis/trans markers or # a pair of which a single atom has a cis/trans marker. # The latter case allows to accommodate adjacent double # bonds. However, there may be a situation where both # atoms already have cis/trans markers, but could still # be reconciled. my @cistrans_bonds2 = grep { is_cis_trans_bond( $graph, $atom2, $_ ) } @neighbours2; my @cistrans_bonds3 = grep { is_cis_trans_bond( $graph, $atom3, $_ ) } @neighbours3; if( @cistrans_bonds2 + @cistrans_bonds3 > 1 ) { warn 'cannot represent cis/trans bond between atoms ' . join( ' and ', sort { $a <=> $b } map { $_->{number} } $atom2, $atom3 ) . ' as there are other cis/trans bonds nearby' . "\n"; return; } if( (@neighbours2 == 2 && !@cistrans_bonds2 && !any { is_single_bond( $graph, $atom2, $_ ) } @neighbours2) || (@neighbours3 == 2 && !@cistrans_bonds3 && !any { is_single_bond( $graph, $atom3, $_ ) } @neighbours3) ) { # Azide group (N=N#N) or conjugated allene-like systems (=C=) warn 'atoms ' . join( ' and ', sort { $a <=> $b } map { $_->{number} } $atom2, $atom3 ) . ' are part of conjugated double/triple bond system, thus ' . 'cis/trans setting of their bond is impossible to represent ' . '(not supported yet)' . "\n"; return; } # Making the $atom2 be the one which has a defined cis/trans bond. # Also, a deterministic ordering of atoms in bond is achieved here. if( @cistrans_bonds3 || (!@cistrans_bonds2 && $order_sub->($atom2) > $order_sub->($atom3)) ) { ( $atom2, $atom3 ) = ( $atom3, $atom2 ); @neighbours2 = $graph->neighbours( $atom2 ); @neighbours3 = $graph->neighbours( $atom3 ); @cistrans_bonds2 = @cistrans_bonds3; @cistrans_bonds3 = (); } # Establishing the canonical order @neighbours2 = sort { $order_sub->($a) <=> $order_sub->($b) } grep { is_single_bond( $graph, $atom2, $_ ) } @neighbours2; @neighbours3 = sort { $order_sub->($a) <=> $order_sub->($b) } grep { is_single_bond( $graph, $atom3, $_ ) } @neighbours3; # Check if there is a chance to have anything marked my $bond_will_be_marked; for my $atom1 (@cistrans_bonds2, @neighbours2) { for my $atom4 (@neighbours3) { my $setting = $setting_sub->( $atom1, $atom2, $atom3, $atom4 ); if( $setting ) { $bond_will_be_marked = 1; last; } } } if( !$bond_will_be_marked ) { warn 'cannot represent cis/trans bond between atoms ' . join( ' and ', sort { $a <=> $b } map { $_->{number} } $atom2, $atom3 ) . ' as there are no eligible single bonds nearby' . "\n"; return; } # If there is an atom with cis/trans bond, then this is this one. # Adjustment to pre-order (neither the requested order, nor the post-order!) is needed to maintain relative settings in order. # Otherwise nondeterminism may occur and result in different (albeit isomorphic) output SMILES like: # C/C=C\CCCCC/C=C\C # C/C=C\CCCCC\C=C/C my( $first_atom ) = @cistrans_bonds2 ? @cistrans_bonds2 : @neighbours2; if( !@cistrans_bonds2 ) { $graph->set_edge_attribute( $first_atom, $atom2, 'bond', $first_atom->{number} < $atom2->{number} ? '/' : '\\' ); } # Adjustments to pre-order (neither the requested order, nor the post-order!) are done here. my $atom4_marked; for my $atom4 (@neighbours3) { my $atom1 = $first_atom; my $setting = $setting_sub->( $atom1, $atom2, $atom3, $atom4 ); next unless $setting; my $other = $graph->get_edge_attribute( $atom1, $atom2, 'bond' ); $other = toggle_cistrans $other if $setting eq 'cis'; $other = toggle_cistrans $other if $atom1->{number} > $atom2->{number}; $other = toggle_cistrans $other if $atom3->{number} > $atom4->{number}; $graph->set_edge_attribute( $atom3, $atom4, 'bond', $other ); $atom4_marked = $atom4 unless $atom4_marked; } for my $atom1 (@neighbours2) { next if $atom1 eq $first_atom; # Marked already my $atom4 = $atom4_marked; my $setting = $setting_sub->( $atom1, $atom2, $atom3, $atom4 ); next unless $setting; my $other = $graph->get_edge_attribute( $atom3, $atom4, 'bond' ); $other = toggle_cistrans $other if $setting eq 'cis'; $other = toggle_cistrans $other if $atom1->{number} > $atom2->{number}; $other = toggle_cistrans $other if $atom3->{number} > $atom4->{number}; $graph->set_edge_attribute( $atom1, $atom2, 'bond', $other ); } } # Store chirality character as additional pseudo vertices and edges. sub chirality_to_pseudograph { my( $moiety ) = @_; for my $atom ($moiety->vertices) { next unless is_chiral $atom; next unless exists $atom->{chirality_neighbours}; my @chirality_neighbours = @{$atom->{chirality_neighbours}}; my $has_lone_pair; if( is_chiral_tetrahedral( $atom ) || is_chiral_planar( $atom ) ) { next unless @chirality_neighbours >= 3 && @chirality_neighbours <= 4; $has_lone_pair = @chirality_neighbours == 3; } elsif( is_chiral_trigonal_bipyramidal( $atom ) ) { next unless @chirality_neighbours >= 4 && @chirality_neighbours <= 5; $has_lone_pair = @chirality_neighbours == 4; } elsif( is_chiral_octahedral( $atom ) ) { next unless @chirality_neighbours >= 5 && @chirality_neighbours <= 6; $has_lone_pair = @chirality_neighbours == 5; } if( $has_lone_pair ) { @chirality_neighbours = ( $chirality_neighbours[0], {}, # marking the lone pair @chirality_neighbours[1..$#chirality_neighbours] ); } if( is_chiral_tetrahedral( $atom ) ) { # Algorithm is described in detail in doi:10.1186/s13321-023-00692-1 if( $atom->{chirality} eq '@' ) { # Reverse the order if counter-clockwise @chirality_neighbours = ( $chirality_neighbours[0], reverse @chirality_neighbours[1..3] ); } for my $i (0..3) { my $neighbour = $chirality_neighbours[$i]; my @chirality_neighbours_now = @chirality_neighbours; if( $i % 2 ) { # Reverse the order due to projected atom change @chirality_neighbours_now = ( $chirality_neighbours_now[0], reverse @chirality_neighbours_now[1..3] ); } my @other = grep { $_ != $neighbour } @chirality_neighbours_now; for my $offset (0..2) { my $connector = {}; $moiety->set_edge_attribute( $neighbour, $connector, 'chiral', 'from' ); $moiety->set_edge_attribute( $atom, $connector, 'chiral', 'to' ); $moiety->set_edge_attribute( $connector, $other[0], 'chiral', 1 ); $moiety->set_edge_attribute( $connector, $other[1], 'chiral', 2 ); $moiety->set_edge_attribute( $connector, $other[2], 'chiral', 3 ); push @other, shift @other; } } } elsif( is_chiral_planar( $atom ) ) { # For square planar environments it is enough to retain the enumeration order of atoms. # To do so, "neighbouring neighbours" are connected together and a link to central atom is placed. if( $atom->{chirality} eq '@SP2' ) { # 4 @chirality_neighbours = map { $chirality_neighbours[$_] } ( 0, 2, 1, 3 ); } elsif( $atom->{chirality} eq '@SP3' ) { # Z @chirality_neighbours = map { $chirality_neighbours[$_] } ( 0, 1, 3, 2 ); } for my $i (0..3) { my $connector = {}; $moiety->set_edge_attribute( $atom, $connector, 'chiral', 'center' ); $moiety->set_edge_attribute( $connector, $chirality_neighbours[$i], 'chiral', 'neighbour' ); $moiety->set_edge_attribute( $connector, $chirality_neighbours[($i + 1) % 4], 'chiral', 'neighbour' ); } } elsif( is_chiral_trigonal_bipyramidal( $atom ) ) { my $number = substr $atom->{chirality}, 3; my $setting = $TB[$number - 1]; my @axis = map { $chirality_neighbours[$_ - 1] } @{$setting->{axis}}; my @other = grep { $_ != $axis[0] && $_ != $axis[1] } map { $chirality_neighbours[$_] } 0..4; @other = reverse @other if $setting->{order} eq '@@'; for my $from (@axis) { my $to = first { $_ != $from } @axis; for (0..2) { my $connector = {}; $moiety->set_edge_attribute( $from, $connector, 'chiral', 'from' ); $moiety->set_edge_attribute( $atom, $connector, 'chiral', 'center' ); $moiety->set_edge_attribute( $to, $connector, 'chiral', 'to' ); $moiety->set_edge_attribute( $connector, $other[-1], 'chiral', 'counter-clockwise' ); $moiety->set_edge_attribute( $connector, $other[ 1], 'chiral', 'clockwise' ); push @other, shift @other; } @other = reverse @other; # Inverting the axis } } else { # Chiral octahedral my $chirality = int substr $atom->{chirality}, 3; my @axis = map { $chirality_neighbours[$_-1] } @{$OH[$chirality-1]->{axis}}; my @sides = grep { $_ != $axis[0] && $_ != $axis[1] } @chirality_neighbours; if( $OH[$chirality-1]->{shape} eq 'Z' ) { ( $sides[2], $sides[3] ) = ( $sides[3], $sides[2] ); } if( $OH[$chirality-1]->{shape} eq '4' ) { ( $sides[0], $sides[3] ) = ( $sides[3], $sides[0] ); } @chirality_neighbours = ( $axis[0], @sides, $axis[1] ); for my $side (( [ [ 0, 5 ], [ 1, 2, 3, 4 ] ], [ [ 1, 3 ], [ 0, 4, 5, 2 ] ], [ [ 2, 4 ], [ 0, 1, 5, 3 ] ] )) { my @axis = map { $chirality_neighbours[$_] } @{$side->[0]}; my @other = map { $chirality_neighbours[$_] } @{$side->[1]}; for my $from (@axis) { my $to = first { $_ != $from } @axis; for (0..3) { my $connector = {}; $moiety->set_edge_attribute( $from, $connector, 'chiral', 'from' ); $moiety->set_edge_attribute( $atom, $connector, 'chiral', 'center' ); $moiety->set_edge_attribute( $to, $connector, 'chiral', 'to' ); $moiety->set_edge_attribute( $connector, $other[-1], 'chiral', 'counter-clockwise' ); $moiety->set_edge_attribute( $connector, $other[ 1], 'chiral', 'clockwise' ); push @other, shift @other; } @other = reverse @other; # Inverting the axis } } } } } sub cis_trans_to_pseudoedges { my( $moiety ) = @_; # Select non-ring double bonds my @double_bonds = grep { is_double_bond( $moiety, @$_ ) && !is_ring_bond( $moiety, @$_ ) && !is_unimportant_double_bond( $moiety, @$_ ) } $moiety->edges; # Connect cis/trans atoms in double bonds with pseudo-edges for my $bond (@double_bonds) { my( $atom2, $atom3 ) = @$bond; my @atom2_neighbours = grep { !is_pseudoedge( $moiety, $atom2, $_ ) } $moiety->neighbours( $atom2 ); my @atom3_neighbours = grep { !is_pseudoedge( $moiety, $atom3, $_ ) } $moiety->neighbours( $atom3 ); next if @atom2_neighbours < 2 || @atom2_neighbours > 3 || @atom3_neighbours < 2 || @atom3_neighbours > 3; my $atom1 = first { is_cis_trans_bond( $moiety, $atom2, $_ ) } @atom2_neighbours; my $atom4 = first { is_cis_trans_bond( $moiety, $atom3, $_ ) } @atom3_neighbours; next unless $atom1 && $atom4; my $atom1_para = first { $_ != $atom1 && $_ != $atom3 } @atom2_neighbours; my $atom4_para = first { $_ != $atom4 && $_ != $atom2 } @atom3_neighbours; my $is_cis = $moiety->get_edge_attribute( $atom1, $atom2, 'bond' ) ne $moiety->get_edge_attribute( $atom3, $atom4, 'bond' ); # Here atom numbers have to be compared to differentiate between cases like: # C/C=C\C and C(\C)=C/C $is_cis = !$is_cis if $atom1->{number} > $atom2->{number}; $is_cis = !$is_cis if $atom3->{number} > $atom4->{number}; $moiety->set_edge_attribute( $atom1, $atom4, 'pseudo', $is_cis ? 'cis' : 'trans' ); if( $atom1_para ) { $moiety->set_edge_attribute( $atom1_para, $atom4, 'pseudo', $is_cis ? 'trans' : 'cis' ); } if( $atom4_para ) { $moiety->set_edge_attribute( $atom1, $atom4_para, 'pseudo', $is_cis ? 'trans' : 'cis' ); } if( $atom1_para && $atom4_para ) { $moiety->set_edge_attribute( $atom1_para, $atom4_para, 'pseudo', $is_cis ? 'cis' : 'trans' ); } } # Unset cis/trans bond markers during second pass for my $bond ($moiety->edges) { next unless is_cis_trans_bond( $moiety, @$bond ); $moiety->delete_edge_attribute( @$bond, 'bond' ); } } sub is_pseudoedge { my( $moiety, $a, $b ) = @_; return $moiety->has_edge_attribute( $a, $b, 'pseudo' ); } # An "unimportant" double bond is one which has chemically identical atoms on one of its sides. # If C<$color_sub> is given, it is used to determine chemical identity of atoms. # If not, only leaf atoms are considered and compared. sub is_unimportant_double_bond { my( $moiety, $a, $b, $color_sub ) = @_; my @a_neighbours = grep { $_ != $b } $moiety->neighbours( $a ); my @b_neighbours = grep { $_ != $a } $moiety->neighbours( $b ); for (\@a_neighbours, \@b_neighbours) { next unless @$_ == 2; my @representations; if( $color_sub ) { @representations = map { $color_sub->( $_ ) } @$_; } else { next if any { $moiety->degree( $_ ) != 1 } @$_; @representations = map { write_SMILES( $_ ) } @$_; } return 1 if uniq( @representations ) == 1; } return; } 1; Chemistry-OpenSMILES-0.11.6/META.json0000644000200400020040000000331314753573665016706 0ustar andriusandrius{ "abstract" : "OpenSMILES format reader and writer", "author" : [ "Andrius Merkys " ], "dynamic_config" : 0, "generated_by" : "Dist::Zilla version 6.012, CPAN::Meta::Converter version 2.150010", "license" : [ "bsd" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, "name" : "Chemistry-OpenSMILES", "prereqs" : { "build" : { "requires" : { "Module::Build" : "0.28", "Module::Build::Parse::Yapp" : "v0.1.2" } }, "configure" : { "requires" : { "Module::Build" : "0.28", "Module::Build::Parse::Yapp" : "v0.1.2" } }, "runtime" : { "requires" : { "Chemistry::Elements" : "0", "Graph" : "0.97", "List::Util" : "1.45", "Parse::Yapp" : "0", "Set::Object" : "0", "perl" : "5.010000" } }, "test" : { "requires" : { "Algorithm::Combinatorics" : "0", "Data::Dumper" : "0", "Test::More" : "0" } } }, "release_status" : "stable", "resources" : { "bugtracker" : { "web" : "https://github.com/merkys/chemistry-opensmiles/issues" }, "homepage" : "https://search.cpan.org/dist/Chemistry-OpenSMILES", "repository" : { "type" : "git", "url" : "git://github.com/merkys/chemistry-opensmiles.git", "web" : "https://github.com/merkys/chemistry-opensmiles" } }, "version" : "0.11.6", "x_generated_by_perl" : "v5.30.0", "x_serialization_backend" : "Cpanel::JSON::XS version 4.19" }