Chemistry-OpenSMILES-0.9.0/0000775000200400020040000000000014516211770015167 5ustar andriusandriusChemistry-OpenSMILES-0.9.0/dist.ini0000644000200400020040000000134114516211770016630 0ustar andriusandriusname = Chemistry-OpenSMILES author = Andrius Merkys license = BSD copyright_holder = Andrius Merkys copyright_year = 2020-2023 version = 0.9.0 [@Filter] -bundle = @Basic -remove = MakeMaker -remove = License [AutoMetaResources] homepage = https://search.cpan.org/dist/%{dist} repository.github = user:merkys bugtracker.github = user:merkys [MetaJSON] [MinimumPerlFast] [ModuleBuild] mb_class = Module::Build::Parse::Yapp build_element = yp [OurPkgVersion] [Prereqs] Graph = 0.97 List::Util = 1.45 Parse::Yapp = 0 [Prereqs / Build] -phase = build Module::Build::Parse::Yapp = 0.1.2 [Prereqs / Configure] -phase = configure Module::Build::Parse::Yapp = 0.1.2 [Prereqs / Test] -phase = test Test::More = 0 Chemistry-OpenSMILES-0.9.0/LICENSE0000644000200400020040000000273314516211770016177 0ustar andriusandriusCopyright (c) The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Chemistry-OpenSMILES-0.9.0/Changes0000644000200400020040000001264714516211770016472 0ustar andriusandrius0.9.0 2023-10-25 - Handling square planar chirality. 0.8.6 2023-06-19 - Implementing is_ring_atom(). - No longer removing chiral centers from atoms in rings. - Accepting coloring subroutine references in is_unimportant_double_bond() and mark_all_double_bonds(). - Returning false instead of undefined value. - Exporting %bond_order_to_symbol and %bond_symbol_to_order to simplify conversion between bond symbols and orders. 0.8.5 2023-01-26 - Fixing incorrect chirality handling code as it did not consider all ring bonds up to now. 0.8.4 2022-11-29 - Ceasing to mark unimportant double bonds in Chemistry::OpenSMILES::Stereo. 0.8.3 2022-11-03 - Exporting %Chemistry::OpenSMILES::normal_valence. 0.8.2 2022-09-20 - Adding Chemistry::OpenSMILES::Aromaticity and Chemistry::OpenSMILES::Stereo from smiles-scripts project v0.2.0. - Fixing prototypes of recently added functions. - Introducing Chemistry::OpenSMILES::is_double_bond(). - Introducing Chemistry::OpenSMILES::is_ring_bond(). 0.8.1 2022-05-11 - Fixing a bug with incomplete cis/trans markers on ring bonds. - Introducing Chemistry::OpenSMILES::toggle_cistrans(). - Introducing two functions for bond type determination. 0.8.0 2022-05-05 - Implementing support for tetrahedral chiral centers having lone pairs. - Safeguarding against infinite cycles in Chemistry::OpenSMILES::Writer::_permutation_order(). - Fixing mistake in Chemistry::OpenSMILES::Parser::parse() documentation. 0.7.0 2021-10-30 - Fixing important issue with representing tetrahedral chiral centers by introducing 'chirality_neighbours' key to atom hashes. 0.6.0 2021-10-18 - Allowing multi-digit hydrogen counts if enabled via parser option. - Standardizing chiral markers for allenal carbons. - Not standardizing chiral markers in raw output. - Reporting '@' and '@@' chiralities with other than 4 neighbours as they cannot be processed right now. 0.5.2 2021-10-11 - Creating explicitly refvertexed Graph objects to get around all the problems caused by interpreting them as scalar values. 0.5.1 2021-08-29 - Working around an incompatibility with Graph v0.9721. 0.5.0 2021-08-26 - Handling raw parsing output in Chemistry::OpenSMILES::clean_chiral_centers(). - Fixing a bug in position counting due to $1 pollution. - Detecting and reporting disconnected parts of moieties. - Introducing Chemistry::OpenSMILES::is_chiral() to detect both chiral atoms and moieties. - Fixing bug in Chemistry::OpenSMILES::clean_chiral_centers() which caused removal of all chiral centers, not just tetrahedral. - Introducing Chemistry::OpenSMILES::mirror() to invert chiral centers, currently only tetrahedral. - Fixing bug in printing of chiral centers, previously only tetrahedral centers were printed. 0.4.6 2021-04-14 - Fixing reading/writing of cis/trans ring bonds. 0.4.5 2021-03-25 - Fixing a bug in aromatic bond detection. 0.4.4 2021-03-09 - Introducing Chemistry::OpenSMILES::clean_chiral_centers() to remove unnecessary tetrahedral chiral settings. 0.4.3 2021-01-25 - Supporting tetrahedral chirality in Chemistry::OpenSMILES::Writer::write_SMILES(). - Chiralities @TH1 and @TH2 are the same as @ and @@, respectively. - Specifying the requirement of Graph v0.97 or later. - Fixing issue with homepage. 0.4.2 2021-01-20 - Exporting Chemistry::OpenSMILES::Writer::write_SMILES(). - Removing chirality information from written SMILES, as code to write this reliably is not yet available. - Fixing writing of cis/trans bonds. 0.4.1 2021-01-12 - Fixing an issue with 'scalar %hash' in t/01_atoms.t. - Listing a dependency on Graph::Traversal::DFS. 0.4.0 2021-01-11 - Adding Chemistry::OpenSMILES::Writer, implementing a SMILES writer for the data structures built by Chemistry::OpenSMILES::Parser. - Converting 'charge' atom field to integer. - Exporting Chemistry::OpenSMILES::is_aromatic(). 0.3.2 2020-10-05 - Explicitly setting bond order to ':' on bonds between aromatic atoms. 0.3.1 2020-09-17 - Establishing deterministic order while generating graph vertices for implicit hydrogen atoms. 0.3.0 2020-09-16 - Deriving counts of implicit hydrogen atoms. - Default number of attached hydrogen atoms for atoms in square brackets is 0. - Supporting bracketless '*' atom. 0.2.1 2020-08-13 - Adding 'number' atom field to store atom positions in the initial SMILES string. - Making 'class' atom field mandatory. - Making 'isotope' atom field an integer. 0.2.0 2020-05-20 - Fixing an issue with graph merging. - Unifying the representation of single bonds. - Extending POD documentation. 0.1.3 2020-05-19 - Detecting and reporting unbalanced parentheses. - Detecting and reporting unclosed ring bonds. - Fixing position pointers in error messages, removing line numbers. - Adding versioned dependency on Module::Build::Parse::Yapp v0.1.2. - Declaring minimum Perl version. - Fixing the POD. 0.1.2 2020-05-17 - Turning hydrogen counts into real vertices. If 'raw' option is present, hydrogen counts are left as integers instead. 0.1.1 2020-05-14 - Adding missing dependency on Graph::Undirected. - Adding POD. 0.1.0 2020-05-13 - Initial release. Chemistry-OpenSMILES-0.9.0/README0000644000200400020040000000050214516211770016042 0ustar andriusandriusThis archive contains the distribution Chemistry-OpenSMILES, version 0.9.0: OpenSMILES format reader and writer This software is Copyright (c) 2020-2023 by Andrius Merkys. This is free software, licensed under: The (three-clause) BSD License This README file was generated by Dist::Zilla::Plugin::Readme v6.012. Chemistry-OpenSMILES-0.9.0/Build.PL0000644000200400020040000000246114516211770016464 0ustar andriusandrius # This file was automatically generated by Dist::Zilla::Plugin::ModuleBuild v6.012. use strict; use warnings; use Module::Build 0.28; use lib qw{inc}; use Module::Build::Parse::Yapp; my %module_build_args = ( "build_requires" => { "Module::Build" => "0.28", "Module::Build::Parse::Yapp" => "v0.1.2" }, "configure_requires" => { "Module::Build" => "0.28", "Module::Build::Parse::Yapp" => "v0.1.2" }, "dist_abstract" => "OpenSMILES format reader and writer", "dist_author" => [ "Andrius Merkys " ], "dist_name" => "Chemistry-OpenSMILES", "dist_version" => "0.9.0", "license" => "bsd", "module_name" => "Chemistry::OpenSMILES", "recursive_test_files" => 1, "requires" => { "Graph" => "0.97", "List::Util" => "1.45", "Parse::Yapp" => 0, "perl" => "5.010000" }, "test_requires" => { "Test::More" => 0 } ); my %fallback_build_requires = ( "Module::Build" => "0.28", "Module::Build::Parse::Yapp" => "v0.1.2", "Test::More" => 0 ); unless ( eval { Module::Build->VERSION(0.4004) } ) { delete $module_build_args{test_requires}; $module_build_args{build_requires} = \%fallback_build_requires; } my $build = Module::Build::Parse::Yapp->new(%module_build_args); $build->add_build_element($_) for qw(yp); $build->create_build_script; Chemistry-OpenSMILES-0.9.0/MANIFEST0000644000200400020040000000147114516211770016321 0ustar andriusandrius# This file was automatically generated by Dist::Zilla::Plugin::Manifest v6.012. Build.PL Changes LICENSE MANIFEST META.json META.yml README dist.ini lib/Chemistry/OpenSMILES.pm lib/Chemistry/OpenSMILES/Aromaticity.pm lib/Chemistry/OpenSMILES/Parser.yp lib/Chemistry/OpenSMILES/Stereo.pm lib/Chemistry/OpenSMILES/Writer.pm t/01_atoms.t t/02_chains.t t/03_hcount.t t/04_errors.t t/05_orders.t t/06_write.t t/07_cistrans.t t/08_permutation_order.t t/09_tetrahedral_chirality.t t/10_validate.t t/11_validate_color.t t/12_tetrahedral_chirality.t t/13_clean_chiral_centers.t t/14_write_disconnected.t t/15_chirality.t t/16_unsupported_chirality.t t/17_max_hydrogen_count_digits.t t/18_chirality_reference.t t/19_lone_pairs.t t/20_aromaticity.t t/21_stereo.t t/22_rings.t t/23_clean_chiral_centers.t t/24_square_planar_chirality.t Chemistry-OpenSMILES-0.9.0/META.yml0000644000200400020040000000157214516211770016443 0ustar andriusandrius--- abstract: 'OpenSMILES format reader and writer' author: - 'Andrius Merkys ' build_requires: Module::Build: '0.28' Module::Build::Parse::Yapp: v0.1.2 Test::More: '0' configure_requires: Module::Build: '0.28' Module::Build::Parse::Yapp: v0.1.2 dynamic_config: 0 generated_by: 'Dist::Zilla version 6.012, CPAN::Meta::Converter version 2.150010' license: bsd meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: Chemistry-OpenSMILES requires: Graph: '0.97' List::Util: '1.45' Parse::Yapp: '0' perl: '5.010000' resources: bugtracker: https://github.com/merkys/chemistry-opensmiles/issues homepage: https://search.cpan.org/dist/Chemistry-OpenSMILES repository: git://github.com/merkys/chemistry-opensmiles.git version: 0.9.0 x_generated_by_perl: v5.30.0 x_serialization_backend: 'YAML::Tiny version 1.73' Chemistry-OpenSMILES-0.9.0/t/0000775000200400020040000000000014516211770015432 5ustar andriusandriusChemistry-OpenSMILES-0.9.0/t/08_permutation_order.t0000644000200400020040000000152714516211770021673 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Writer; use Test::More; sub order { return join '', Chemistry::OpenSMILES::Writer::_permutation_order( @_ ); } my $random_cases = 20; my @bad_cases = ( [ 0..2 ], [ 0..4 ], [ 1..4 ], [ 0..2, undef ], [ 0, 1, 12, '' ], ); plan tests => $random_cases + (2 * scalar @bad_cases); for (1..$random_cases) { my @order = 0..3; for (0..9) { if( rand() < 0.5 ) { @order = ( @order[1..2], $order[0], $order[3] ); } else { @order = ( $order[0], @order[2..3], $order[1] ); } } is( order( @order ), '0123' ); } for (@bad_cases) { my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; is( order( @$_ ), '0123' ); ok( defined $warning && $warning =~ /unexpected input received/ ); } Chemistry-OpenSMILES-0.9.0/t/15_chirality.t0000644000200400020040000000163114516211770020113 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw(is_chiral mirror); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'N[C@](Br)(O)C', 1, 1, 'N([C@@](Br)(O)(C))' ], [ 'N[C@@](Br)(O)C', 1, 1, 'N([C@](Br)(O)(C))' ], [ 'N(C)(Br)(O)C', 0, 0, 'N(C)(Br)(O)(C)' ], [ 'N(C)(Br)(O)C', 0, 0, 'N(C)(Br)(O)(C)' ], [ 'N[C@AL1](Br)(O)C', 1, 0, 'N(C(Br)(O)(C))' ], ); plan tests => 3 * scalar @cases; for my $case (@cases) { my $parser; my $moiety; my $result; $parser = Chemistry::OpenSMILES::Parser->new; ( $moiety ) = $parser->parse( $case->[0], { raw => 1 } ); is( is_chiral( $moiety ) + 0, $case->[1] ); is( Chemistry::OpenSMILES::is_chiral_tetrahedral( $moiety ) + 0, $case->[2] ); mirror( $moiety ); is( write_SMILES( $moiety ), $case->[3] ); } Chemistry-OpenSMILES-0.9.0/t/09_tetrahedral_chirality.t0000644000200400020040000000277514516211770022507 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'N[C@](Br)(O)C', 'N([C@](Br)(O)(C))', 'C([C@](O)(Br)(N))' ], [ 'Br[C@](O)(N)C', 'Br([C@](O)(N)(C))', 'C([C@](N)(O)(Br))' ], [ 'O[C@](Br)(C)N', 'O([C@](Br)(C)(N))', 'N([C@](C)(Br)(O))' ], [ 'Br[C@](C)(O)N', 'Br([C@](C)(O)(N))', 'N([C@](O)(C)(Br))' ], [ 'C[C@](Br)(N)O', 'C([C@](Br)(N)(O))', 'O([C@](N)(Br)(C))' ], [ 'Br[C@](N)(C)O', 'Br([C@](N)(C)(O))', 'O([C@](C)(N)(Br))' ], [ 'C[C@@](Br)(O)N', 'C([C@@](Br)(O)(N))', 'N([C@@](O)(Br)(C))' ], [ 'Br[C@@](N)(O)C', 'Br([C@@](N)(O)(C))', 'C([C@@](O)(N)(Br))' ], [ '[C@@](C)(Br)(O)N', '[C@@](C)(Br)(O)(N)', 'N([C@@](O)(Br)(C))' ], [ '[C@@](Br)(N)(O)C', '[C@@](Br)(N)(O)(C)', 'C([C@@](O)(N)(Br))' ], [ 'C1OCC[C@]1(Cl)Br', 'C1(O(C(C([C@]1(Cl)(Br)))))', 'Br([C@@]1(Cl)(C(C(O(C1)))))' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0], { raw => 1 } ); $result = write_SMILES( \@moieties ); is( $result, $case->[1] ); $result = write_SMILES( \@moieties, \&reverse_order ); is( $result, $case->[2] ); } sub reverse_order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$b}{number} <=> $vertices->{$a}{number} } keys %$vertices; return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.9.0/t/17_max_hydrogen_count_digits.t0000644000200400020040000000056214516211770023366 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( '[YbH12+3]' => 13, ); plan tests => scalar keys %cases; for (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_, { max_hydrogen_count_digits => 2 } ); is( $graph->vertices, $cases{$_} ); } Chemistry-OpenSMILES-0.9.0/t/21_stereo.t0000644000200400020040000000422514516211770017423 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw( is_cis_trans_bond ); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Stereo qw( chirality_to_pseudograph cis_trans_to_pseudoedges mark_all_double_bonds ); use Test::More; my @cases = ( [ 'C/C=C/C', 12, 15, 12, 11, 4 ], [ 'F/N=C(/F)\F', 5, 4, 5, 4, 0 ], ); plan tests => 5 * @cases + 2; my $parser = Chemistry::OpenSMILES::Parser->new; my( $moiety ) = $parser->parse( 'N[C@](Br)(O)C' ); chirality_to_pseudograph( $moiety ); is( $moiety->vertices, 23 ); is( $moiety->edges, 70 ); for my $case (@cases) { my( $smiles, $vertices_with_pseudo, $edges_with_pseudo, $vertices_when_marked, $edges_when_marked, $cis_trans_bonds ) = @$case; my( $moiety ) = $parser->parse( $smiles ); # copy() makes a shallow copy without edge attributes, thus they # have to be added later: my $copy = $moiety->copy; for my $bond ($moiety->edges) { next unless $moiety->has_edge_attribute( @$bond, 'bond' ); $copy->set_edge_attribute( @$bond, 'bond', $moiety->get_edge_attribute( @$bond, 'bond' ) ); } cis_trans_to_pseudoedges( $copy ); is( $copy->vertices, $vertices_with_pseudo ); is( $copy->edges, $edges_with_pseudo ); # Drop cis/trans markers from the input graph and mark them # anew. for my $bond ($moiety->edges) { next unless is_cis_trans_bond( $moiety, @$bond ); $moiety->delete_edge_attribute( @$bond, 'bond' ); } mark_all_double_bonds( $moiety, sub { if( $copy->has_edge( $_[0], $_[3] ) && $copy->has_edge_attribute( $_[0], $_[3], 'pseudo' ) ) { return $copy->get_edge_attribute( $_[0], $_[3], 'pseudo' ); } } ); is( $moiety->vertices, $vertices_when_marked ); is( $moiety->edges, $edges_when_marked ); is( scalar( grep { is_cis_trans_bond( $moiety, @$_ ) } $moiety->edges ), $cis_trans_bonds ); } Chemistry-OpenSMILES-0.9.0/t/05_orders.t0000644000200400020040000000215014516211770017415 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'CCC' => {}, 'C-C' => {}, 'C=C' => { '=' => 1 }, 'C-1C1' => {}, 'C=1C1' => { '=' => 1 }, 'C(=O)' => { '=' => 1 }, 'C(C=C)' => { '=' => 1 }, 'cc' => { ':' => 1 }, 'c:c' => { ':' => 1 }, 'c(c)' => { ':' => 1 }, 'c1ccc1' => { ':' => 4 }, 'c-c' => {}, 'c(-c)' => {}, 'c1cc-1' => { ':' => 2 }, 'c-1cc1' => { ':' => 2 }, ); plan tests => 2 * scalar keys %cases; for my $case (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my @graphs = $parser->parse( $case ); is( scalar @graphs, 1 ); my $graph = shift @graphs; my %orders; for ($graph->edges) { next if !$graph->has_edge_attribute( @$_, 'bond' ); $orders{ $graph->get_edge_attribute( @$_, 'bond' )} ++; } is( serialize( \%orders ), serialize( $cases{$case} ) ); } sub serialize { my( $orders ) = @_; return join ' ', map { $_ . '(' . $orders->{$_} . ')' } sort keys %$orders; } Chemistry-OpenSMILES-0.9.0/t/13_clean_chiral_centers.t0000644000200400020040000000113414516211770022246 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw(clean_chiral_centers); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my %cases = ( 'C[C@H2]O' => 1, 'C[C@H]([H])O' => 1, 'C[C@H]([S])O' => 0, ); plan tests => 2 * scalar keys %cases; for my $case (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my @moieties = $parser->parse( $case, { raw => 1 } ); is( scalar @moieties, 1 ); is( scalar clean_chiral_centers( $moieties[0], \&write_SMILES ), $cases{$case} ); } Chemistry-OpenSMILES-0.9.0/t/04_errors.t0000644000200400020040000000254314516211770017440 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'C(CC' => 't/04_errors.t: syntax error: missing closing parenthesis.', 'CC)C' => 't/04_errors.t: syntax error: unbalanced parentheses.', 'CCC)' => 't/04_errors.t: syntax error: unbalanced parentheses.', 'C#=O' => 't/04_errors.t: syntax error at position 3: \'O\'.', 'C..O' => 't/04_errors.t: syntax error at position 3: \'O\'.', 'CCC1' => 't/04_errors.t: unclosed ring bond(s) detected: 1.', 'C2%12' => 't/04_errors.t: unclosed ring bond(s) detected: 2, 12.', 'C=1CC$1' => 't/04_errors.t: ring bond types for ring bond 1 do not match.', 'C((C))O' => 't/04_errors.t: syntax error at position 3: \'C))O\'.', '(O)' => 't/04_errors.t: syntax error at position 1: \'O)\'.', '(O)C' => 't/04_errors.t: syntax error at position 1: \'O)C\'.', '.CC' => 't/04_errors.t: syntax error at position 1: \'CC\'.', 'CC.' => 't/04_errors.t: syntax error at position 4.', ); plan tests => scalar keys %cases; for (sort keys %cases) { 'intentionally polluting $1' =~ /(\S+)/; my $error; eval { my $parser = Chemistry::OpenSMILES::Parser->new; my @graphs = $parser->parse( $_ ); }; $error = $@ if $@; $error =~ s/\n$// if $error; is( $error, $cases{$_} ); } Chemistry-OpenSMILES-0.9.0/t/22_rings.t0000644000200400020040000000152414516211770017244 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw( is_ring_atom is_ring_bond ); use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C' => { atoms => 9, bonds => 10 }, ); plan tests => 4 * scalar keys %cases; for my $case (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $molecule ) = $parser->parse( $case ); is scalar( grep { is_ring_atom( $molecule, $_ ) } $molecule->vertices ), $cases{$case}->{atoms}; is scalar( grep { is_ring_bond( $molecule, @$_ ) } $molecule->edges ), $cases{$case}->{bonds}; is scalar( grep { is_ring_atom( $molecule, $_, -1 ) } $molecule->vertices ), $cases{$case}->{atoms}; is scalar( grep { is_ring_bond( $molecule, @$_, -1 ) } $molecule->edges ), $cases{$case}->{bonds}; } Chemistry-OpenSMILES-0.9.0/t/20_aromaticity.t0000644000200400020040000000137314516211770020447 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Aromaticity qw( aromatise kekulise ); use Chemistry::OpenSMILES::Writer qw( write_SMILES ); use Test::More; my @cases = ( [ 'C1=CC=CC=C1', 'c:1(:c(:c(:c(:c(:c:1([H]))([H]))([H]))([H]))([H]))([H])', 'C=1(C(=C(C(=C(C=1([H]))([H]))([H]))([H]))([H]))([H])' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $result; my $parser = Chemistry::OpenSMILES::Parser->new; my( $moiety ) = $parser->parse( $case->[0] ); aromatise( $moiety ); $result = write_SMILES( [ $moiety ] ); is( $result, $case->[1] ); kekulise( $moiety ); $result = write_SMILES( [ $moiety ] ); is( $result, $case->[2] ); } Chemistry-OpenSMILES-0.9.0/t/14_write_disconnected.t0000644000200400020040000000074714516211770022005 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Graph::Undirected; use Test::More; plan tests => 2; my $graph = Graph::Undirected->new( refvertexed => 1 ); $graph->add_vertex( { symbol => 'C', number => 1 } ); $graph->add_vertex( { symbol => 'O', number => 2 } ); my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; is( write_SMILES( [ $graph ] ), 'C' ); is( $warning, '1 unreachable atom(s) detected in moiety' . "\n" ); Chemistry-OpenSMILES-0.9.0/t/18_chirality_reference.t0000644000200400020040000000154114516211770022134 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw( is_chiral ); use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'C[C@@](Br)(O)N' => 0, 'Br[C@@](N)(O)C' => 0, '[C@@](C)(Br)(O)N' => 1, '[C@@](Br)(N)(O)C' => 1, 'FC1C[C@](Br)(Cl)CCC1' => 2, '[C@]1(Br)(Cl)CCCC(F)C1' => 8, 'C1.[C@]1(Br)(Cl)O' => 0, 'C(CCCC1)[C@]1(Br)(Cl)' => 0, 'C([C@](Br)(Cl)O)C' => 0, ); plan tests => 3 * scalar keys %cases; for (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_ ); my( $chiral_center ) = grep { is_chiral $_ } $graph->vertices; ok( defined $chiral_center ); ok( exists $chiral_center->{chirality_neighbours} ); is( $chiral_center->{chirality_neighbours}[0]{number}, $cases{$_} ); } Chemistry-OpenSMILES-0.9.0/t/24_square_planar_chirality.t0000644000200400020040000000173214516211770023032 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'N[C@SP1](Br)(O)C', 'N([C@SP1](Br)(O)(C))', 'C([C@SP1](O)(Br)(N))' ], [ 'N[C@SP2](Br)(O)C', 'N([C@SP2](Br)(O)(C))', 'C([C@SP2](O)(Br)(N))' ], [ 'N[C@SP3](Br)(O)C', 'N([C@SP3](Br)(O)(C))', 'C([C@SP3](O)(Br)(N))' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0], { raw => 1 } ); $result = write_SMILES( \@moieties ); is( $result, $case->[1] ); $result = write_SMILES( \@moieties, \&reverse_order ); is( $result, $case->[2] ); } sub reverse_order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$b}{number} <=> $vertices->{$a}{number} } keys %$vertices; return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.9.0/t/01_atoms.t0000644000200400020040000000145214516211770017242 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw( write_SMILES ); use Test::More; my @cases = qw( C N Cl * [U] [Pb] [He] [CH4] [ClH] [Cl-] [Cu+2] [13CH4] [2H+] [238U] ); my %cases = ( '[*]' => '*', '[ClH1]' => '[ClH]', '[Cu++]' => '[Cu+2]', '[OH-1]' => '[OH-]', '[OH1-]' => '[OH-]', '[C@TH1]' => '[C@TH1]', '[C@TH2]' => '[C@TH2]', map { $_ => $_ } @cases, ); plan tests => 2 * scalar keys %cases; for (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_, { raw => 1 } ); is( $graph->vertices, 1 ); is( join( '', map { write_SMILES( $_ ) } $graph->vertices ), $cases{$_} ); } Chemistry-OpenSMILES-0.9.0/t/19_lone_pairs.t0000644000200400020040000000233414516211770020263 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ '[C@](C)(N)(O)', '[C@](C)(N)(O)', 'O([C@](N)(C))' ], # Same as before, inverting enumeration direction: [ '[C@@](C)(N)(O)', '[C@@](C)(N)(O)', 'O([C@@](N)(C))' ], [ 'C[C@](O)(N)', 'C([C@](O)(N))', 'N([C@@](O)(C))' ], # Same as before, inverting enumeration direction: [ 'C[C@@](O)(N)', 'C([C@@](O)(N))', 'N([C@](O)(C))' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0] ); $result = write_SMILES( \@moieties ); is( drop_H( $result ), $case->[1] ); $result = write_SMILES( \@moieties, \&reverse_order ); is( drop_H( $result ), $case->[2] ); } sub drop_H { my( $smiles ) = @_; $smiles =~ s/\(\[H\]\)//g; $smiles =~ s/^\[H\]\((.+)\)$/$1/; return $smiles; } sub reverse_order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$b}{number} <=> $vertices->{$a}{number} } keys %$vertices; return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.9.0/t/10_validate.t0000644000200400020040000000165414516211770017714 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( '[C@]' => 'chiral center C(0) has 0 bonds while at least 3 is required', 'C/C(\O)=C(/C)(\O)' => 'atom C(1) has 2 bonds of type \'\\\', cis/trans definitions must not conflict', 'C(Cl)(F)(O)' => 'atom C(0) has 4 distinct neighbours, but does not have a chiral setting', 'C11' => 'atom C(0) has bond to itself', 'C/C' => 'cis/trans bond is defined between atoms C(0) and C(1), but neither of them is attached to a double bond', ); plan tests => scalar keys %cases; for (sort keys %cases) { my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_ ); Chemistry::OpenSMILES::_validate( $graph ); $warning =~ s/\n$// if defined $warning; is( $warning, $cases{$_} ); } Chemistry-OpenSMILES-0.9.0/t/11_validate_color.t0000644000200400020040000000300214516211770021100 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw( clean_chiral_centers ); use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'C[C@](C)(C)(C)' => 'tetrahedral chiral setting for C(1) is not needed as not all 4 neighbours are distinct', 'C[C@](Cl)(F)(O)' => undef, 'C(Cl)(F)(O)' => 'atom C(0) has 4 distinct neighbours, but does not have a chiral setting', # Anomers must not loose chirality settings 'N[C@@]12NC(N[C@]2(NC(N1))N)' => undef, ); plan tests => 3 * scalar keys %cases; for (sort keys %cases) { my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_ ); Chemistry::OpenSMILES::_validate( $graph, sub { return $_[0]->{symbol} } ); $warning =~ s/\n$// if defined $warning; is $warning, $cases{$_}; # Unnecessary chiral centers should be removed my @affected = clean_chiral_centers( $graph, sub { return $_[0]->{symbol} } ); is @affected != 0, defined $cases{$_} && $cases{$_} =~ /not needed/, "$_ - affected atoms"; # After removal, validation should pass undef $warning; Chemistry::OpenSMILES::_validate( $graph, sub { return $_[0]->{symbol} } ); $warning =~ s/\n$// if defined $warning; is !defined $warning, !defined $cases{$_} || @affected != 0, "$_ - validation"; } Chemistry-OpenSMILES-0.9.0/t/03_hcount.t0000644000200400020040000000217414516211770017423 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'C' => 5, 'C[U]' => 5, 'N' => 4, 'Cl' => 2, '[U]' => 1, '[Pb]' => 1, '[He]' => 1, '[*]' => 1, '[CH4]' => 5, '[ClH]' => 2, '[ClH1]' => 2, '[Cl-]' => 1, '[OH1-]' => 2, '[OH-1]' => 2, '[Cu+2]' => 1, '[Cu++]' => 1, '[13CH4]' => 5, '[2H+]' => 1, '[238U]' => 1, 'S' => 3, 'S[O]' => 3, 'S([O])([O])' => 3, 'S([O])([O])([O])' => 5, 'S([O])([O])([O])([O])' => 5, 'S([O])([O])([O])([O])([O])' => 7, 'O([O])' => 3, 'O([O])([O])' => 3, 'O([O])([O])([O])' => 4, 'CC' => 8, 'C-C' => 8, 'C=C' => 6, 'C#C' => 4, 'C$C' => 2, 'C1=CC=CC=C1' => 12, 'c1ccccc1' => 12, 'c1cncnc1' => 10, ); plan tests => scalar keys %cases; for (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_ ); is( $graph->vertices, $cases{$_} ); } Chemistry-OpenSMILES-0.9.0/t/23_clean_chiral_centers.t0000644000200400020040000000224314516211770022251 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw(clean_chiral_centers); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Stereo qw(chirality_to_pseudograph); use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; eval 'use Graph::Nauty qw(orbits)'; plan skip_all => 'no Graph::Nauty' if $@; sub depict { my( $vertex ) = @_; if( ref $vertex && exists $vertex->{symbol} ) { return &write_SMILES; } else { return ''; } } my @orbits; sub orbit { my( $vertex ) = @_; for my $i (0..$#orbits) { return $i if grep { $_ == $vertex } @{$orbits[$i]}; } } my @cases = ( [ 'CC[C@](CO)(CCl)C', 0 ], [ 'CC[C@](CC)(CC)CC', 1 ], [ 'C[S@](O)(O)[O-]', 0 ], # FIXME: Something is off here, should be 1! ); plan tests => scalar @cases; for my $case (@cases) { my( $smiles, $changed ) = @$case; my $parser = Chemistry::OpenSMILES::Parser->new; my( $moiety ) = $parser->parse( $smiles ); my $copy = $moiety->copy; chirality_to_pseudograph( $copy ); @orbits = orbits( $copy, \&depict ); is scalar clean_chiral_centers( $moiety, \&orbit ), $changed, $smiles; } Chemistry-OpenSMILES-0.9.0/t/12_tetrahedral_chirality.t0000644000200400020040000000146214516211770022471 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'Br[C@H](Cl)(I)', 'Br([C@](Cl)(I)([H]))', '[H]([C@](I)(Cl)(Br))' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0] ); $result = write_SMILES( \@moieties ); is( $result, $case->[1] ); $result = write_SMILES( \@moieties, \&reverse_order ); is( $result, $case->[2] ); } sub reverse_order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$b}{number} <=> $vertices->{$a}{number} } keys %$vertices; return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.9.0/t/02_chains.t0000644000200400020040000000373514516211770017373 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use List::Util qw(sum); use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'CC' => [ 2, 1 ], 'CCO' => [ 3, 2 ], 'NCCCC' => [ 5, 4 ], 'CCCCN' => [ 5, 4 ], 'C=C' => [ 2, 1 ], 'C#N' => [ 2, 1 ], 'CC#CC' => [ 4, 3 ], 'CCC=O' => [ 4, 3 ], '[Rh-](Cl)(Cl)(Cl)(Cl)$[Rh-](Cl)(Cl)(Cl)Cl' => [ 10, 9 ], 'C-C' => [ 2, 1 ], 'CCC(CC)CO' => [ 7, 6 ], 'CC(C)C(=O)C(C)C' => [ 8, 7 ], 'OCC(CCC)C(C(C)C)CCC' => [ 13, 12 ], 'OS(=O)(=S)O' => [ 5, 4 ], 'C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C))))))))))))))))))))C' => [ 22, 21 ], 'C1CCCCC1' => [ 6, 6 ], 'N1CC2CCCCC2CC1' => [ 10, 11 ], 'C=1CCCCC=1' => [ 6, 6 ], 'C1CCCCC1C1CCCCC1' => [ 12, 13 ], 'C1CCCCC1C2CCCCC2' => [ 12, 13 ], 'C0CCCCC0' => [ 6, 6 ], 'C%25CCCCC%25' => [ 6, 6 ], 'C1CCCCC%01' => [ 6, 6 ], 'C12(CCCCC1)CCCCC2' => [ 11, 12 ], # The following case is not allowed by OpenSMILES specification, # however, it is easier to support it than forbid. 'C(CCCCC1)12CCCCC2' => [ 11, 12 ], '[Na+].[Cl-]' => [ 2, 2, 0 ], 'c1cc(O.NCCO)ccc1' => [ 2, 11, 10 ], 'Oc1cc(.NCCO)ccc1' => [ 2, 11, 10 ], 'C1.C1' => [ 1, 2, 1 ], 'C1.C12.C2' => [ 1, 3, 2 ], 'c1c2c3c4cc1.Br2.Cl3.Cl4' => [ 1, 9, 9 ], 'C(C(C1))C1' => [ 4, 4 ], # FIXME: self-bond is obviously incorrect and should be reported. 'C11' => [ 1, 1 ], ); plan tests => 3 * scalar keys %cases; for my $case (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my @graphs = $parser->parse( $case, { raw => 1 } ); is( scalar @graphs, @{$cases{$case}} == 3 ? $cases{$case}->[0] : 1 ); is( sum( map { scalar $_->vertices } @graphs ), $cases{$case}->[-2] ); is( sum( map { scalar $_->edges } @graphs ), $cases{$case}->[-1] ); } Chemistry-OpenSMILES-0.9.0/t/16_unsupported_chirality.t0000644000200400020040000000165414516211770022571 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'N[C@](Br)(O)C', 'N([C@](Br)(O([H]))(C([H])([H])([H])))([H])([H])', undef ], [ 'NC(Br)=[C@]=C(O)C', 'N(C(Br)(=C(=C(O([H]))(C([H])([H])([H])))))([H])([H])', undef ], [ 'N[C@](Br)(O)(C)(Cl)', 'N(C(Br)(O([H]))(C([H])([H])([H]))(Cl))([H])([H])', 'chirality \'@\' observed for atom with 5 neighbours, can only process tetrahedral chiral centers with possible lone pairs' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; my $parser = Chemistry::OpenSMILES::Parser->new; my @moieties = $parser->parse( $case->[0] ); is( write_SMILES( \@moieties ), $case->[1] ); $warning =~ s/\n$// if $warning; is( $warning, $case->[2] ); } Chemistry-OpenSMILES-0.9.0/t/07_cistrans.t0000644000200400020040000000247514516211770017761 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'Br/C=C/F', 'Br(/C(=C(/F)))', 'F(\C(=C(\Br)))' ], [ 'C(\Br)=C/F', 'C(\Br)(=C(/F))', 'F(\C(=C(\Br)))' ], [ 'Br\C=C/F', 'Br(\C(=C(/F)))', 'F(\C(=C(/Br)))' ], [ 'C(/Br)=C/F', 'C(/Br)(=C(/F))', 'F(\C(=C(/Br)))' ], # Adapted from COD entry 1100225: [ 'Cl/C(=C\1COCN1)C', 'Cl(/C(=C\1(C(O(C(N/1)))))(C))', 'C(C(=C1(\N(C(O(C1)))))(\Cl))' ], # The following two cases are synonymous: [ 'C\1CCOC/1=C/O', 'C\1(C(C(O(C/1(=C(/O))))))', 'O(\C(=C/1(O(C(C(C\1))))))' ], [ 'C1CCOC/1=C/O', 'C\1(C(C(O(C/1(=C(/O))))))', 'O(\C(=C/1(O(C(C(C\1))))))' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0], { raw => 1 } ); $result = write_SMILES( \@moieties ); is( $result, $case->[1] ); $result = write_SMILES( \@moieties, \&reverse_order ); is( $result, $case->[2] ); } sub reverse_order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$b}{number} <=> $vertices->{$a}{number} } keys %$vertices; return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.9.0/t/06_write.t0000644000200400020040000000274714516211770017266 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'C', 'C' ], [ 'C=C', 'C(=C)' ], [ 'C=1=C=C=C=1', 'C=1(=C(=C(=C=1)))' ], [ 'C#C.c1ccccc1', 'C(#C).c:1(:c(:c(:c(:c(:c:1)))))' ], [ 'C1CC2CCCCC2CC1', 'C1(C(C2(C(C(C(C(C2(C(C1)))))))))' ], # A strange way to write fused rings: [ 'C1(CCCCC11)(CCCC1)', 'C12(C(C(C(C(C1(C(C(C(C2)))))))))' ], # Single bonds between two aromatic atoms must be explicitly represented: [ 'c1cc-ccc1', 'c:1(:c(:c(-c(:c(:c:1)))))' ], # Chirality information is preserved: [ 'N[C@](Br)(O)C', 'N([C@](Br)(O)(C))' ], [ 'N[C@@](Br)(O)C', 'N([C@@](Br)(O)(C))' ], # A regression test for previously incorrectly identified aromatic bond: [ 'c1(c(cccc1)F)C(=O)[O-]', 'c:1(:c(:c(:c(:c(:c:1))))(F))(C(=O)([O-]))' ], # Cyclooctatetraene adapted from OpenSMILES v1.0 specification: [ 'C/1=C/C=C\C=C/C=C\1', 'C/1(=C(/C(=C(\C(=C(/C(=C\1)))))))' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0], { raw => 1 } ); $result = write_SMILES( \@moieties ); is( $result, $case->[1] ); $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $result, { raw => 1 } ); $result = write_SMILES( \@moieties ); is( $result, $case->[1] ); } Chemistry-OpenSMILES-0.9.0/lib/0000775000200400020040000000000014516211770015735 5ustar andriusandriusChemistry-OpenSMILES-0.9.0/lib/Chemistry/0000775000200400020040000000000014516211770017704 5ustar andriusandriusChemistry-OpenSMILES-0.9.0/lib/Chemistry/OpenSMILES.pm0000644000200400020040000004226214516211770022064 0ustar andriusandriuspackage Chemistry::OpenSMILES; use strict; use warnings; use 5.0100; # ABSTRACT: OpenSMILES format reader and writer our $VERSION = '0.9.0'; # VERSION require Exporter; our @ISA = qw( Exporter ); our @EXPORT_OK = qw( %bond_order_to_symbol %bond_symbol_to_order clean_chiral_centers is_aromatic is_chiral is_cis_trans_bond is_double_bond is_ring_atom is_ring_bond is_single_bond is_triple_bond mirror %normal_valence toggle_cistrans ); use Graph::Traversal::BFS; use List::Util qw( all any none ); sub is_chiral($); sub is_chiral_planar($); sub is_chiral_tetrahedral($); sub mirror($); sub toggle_cistrans($); our %normal_valence = ( B => [ 3 ], C => [ 4 ], N => [ 3, 5 ], O => [ 2 ], P => [ 3, 5 ], S => [ 2, 4, 6 ], F => [ 1 ], Cl => [ 1 ], Br => [ 1 ], I => [ 1 ], c => [ 3 ], # Not from OpenSMILES specification ); our %bond_order_to_symbol = ( 1 => '-', 1.5 => ':', 2 => '=', 3 => '#', 4 => '$', ); our %bond_symbol_to_order = ( '-' => 1, ':' => 1.5, '=' => 2, '#' => 3, '$' => 4, ); # Removes chiral setting from allenal, square planar or tetrahedral chiral centers if deemed unimportant. # For allenal and tetrahedral arrangements this means situations with less than four distinct neighbours. # For square planar arrangements this means situations when all neighbours are the same. # Only chiral centers with four atoms are affected, thus three-atom centers (implying lone pairs) are left untouched. # Returns the affected atoms. # # TODO: check other chiral centers sub clean_chiral_centers($$) { my( $moiety, $color_sub ) = @_; my @affected; for my $atom ($moiety->vertices) { next unless is_chiral_allenal( $atom ) || is_chiral_planar( $atom ) || is_chiral_tetrahedral( $atom ); # Anomers must not loose chirality settings in any way next if is_ring_atom( $moiety, $atom, scalar $moiety->edges ); my $hcount = exists $atom->{hcount} ? $atom->{hcount} : 0; my @neighbours = $moiety->neighbours( $atom ); if( is_chiral_allenal( $atom ) ) { @neighbours = grep { $_ != $atom } map { $moiety->neighbours( $_ ) } @neighbours; } next if @neighbours + $hcount != 4; my %colors; for (@neighbours, ( { symbol => 'H' } ) x $hcount) { $colors{$color_sub->( $_ )}++; } if( is_chiral_planar( $atom ) ) { # Chiral planar center markers make sense even if only two types of atoms are there. next if scalar keys %colors > 2; next if scalar keys %colors == 2 && all { $_ == 2 } values %colors; } else { next if scalar keys %colors == 4; } delete $atom->{chirality}; push @affected, $atom; } return @affected; } sub is_aromatic($) { my( $atom ) = @_; return $atom->{symbol} ne ucfirst $atom->{symbol}; } sub is_chiral($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom return exists $what->{chirality}; } else { # Graph representing moiety return any { is_chiral( $_ ) } $what->vertices; } } sub is_chiral_allenal($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom return $what->{chirality} && $what->{chirality} =~ /^\@AL[12]$/; } else { # Graph representing moiety return any { is_chiral_allenal( $_ ) } $what->vertices; } } sub is_chiral_planar($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom return $what->{chirality} && $what->{chirality} =~ /^\@SP[123]$/; } else { # Graph representing moiety return any { is_chiral_planar( $_ ) } $what->vertices; } } sub is_chiral_tetrahedral($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom # CAVEAT: will fail for allenal configurations of @/@@ in raw mode return $what->{chirality} && $what->{chirality} =~ /^@@?$/; } else { # Graph representing moiety return any { is_chiral_tetrahedral( $_ ) } $what->vertices; } } sub is_cis_trans_bond { my( $moiety, $a, $b ) = @_; return $moiety->has_edge_attribute( $a, $b, 'bond' ) && $moiety->get_edge_attribute( $a, $b, 'bond' ) =~ /^[\\\/]$/; } sub is_double_bond { my( $moiety, $a, $b ) = @_; return $moiety->has_edge_attribute( $a, $b, 'bond' ) && $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '='; } # An atom is deemed to be a ring atom if any of its bonds is a ring bond. sub is_ring_atom { my( $moiety, $atom, $max_length ) = @_; return '' unless $moiety->degree( $atom ) > 1; return any { is_ring_bond( $moiety, $atom, $_, $max_length ) } $moiety->neighbours( $atom ); } # A bond is deemed to be a ring bond if there is an alternative path # joining its atoms not including the bond in consideration and this # alternative path is not longer than 7 bonds. This is based on # O'Boyle (2012) saying that Open Babel SMILES writer does not output # cis/trans markers for double bonds in rings of size 8 or less due to # them implicilty being cis bonds. # # If maximum ring size is given negative, ring size is not limited. sub is_ring_bond { my( $moiety, $a, $b, $max_length ) = @_; $max_length = 7 unless $max_length; # A couple of shortcuts to reduce the complexity return '' if any { $moiety->degree( $_ ) == 1 } ( $a, $b ); return '' if scalar( $moiety->vertices ) > scalar( $moiety->edges ); if( $max_length < 0 ) { # Due to the issue in Graph, bridges() returns strings instead of real objects. # Graph issue: https://github.com/graphviz-perl/Graph/issues/29 my %vertices_by_name = map { $_ => $_ } $moiety->vertices; return none { ( $_->[0] == $a && $_->[1] == $b ) || ( $_->[0] == $b && $_->[1] == $a ) } map { [ map { $vertices_by_name{$_} } @$_ ] } $moiety->bridges; } my $copy = $moiety->copy; $copy->delete_edge( $a, $b ); my %distance = ( $a => 0 ); my $record_length = sub { # Record number of bonds between $a and any other vertex my( $u, $v ) = @_; my @seen = grep { exists $distance{$_} } ( $u, $v ); return '' if @seen != 1; # Can this be 0? my $seen = shift @seen; my( $unseen ) = grep { !exists $distance{$_} } ( $u, $v ); $distance{$unseen} = $distance{$seen} + 1; }; my $operations = { start => sub { return $a }, tree_edge => $record_length, }; my $traversal = Graph::Traversal::BFS->new( $copy, %$operations ); $traversal->bfs; # $distance{$b} is the distance in bonds. In 8-member rings adjacent # ring atoms have distance of 7 bonds. return exists $distance{$b} && $distance{$b} <= $max_length; } sub is_single_bond { my( $moiety, $a, $b ) = @_; return !$moiety->has_edge_attribute( $a, $b, 'bond' ) || $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '-'; } sub is_triple_bond { my( $moiety, $a, $b ) = @_; return $moiety->has_edge_attribute( $a, $b, 'bond' ) && $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '#'; } sub mirror($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom # FIXME: currently dealing only with tetrahedral chiral centers if( is_chiral_tetrahedral( $what ) ) { $what->{chirality} = $what->{chirality} eq '@' ? '@@' : '@'; } } else { for ($what->vertices) { mirror( $_ ); } } } sub toggle_cistrans($) { return $_[0] eq '/' ? '\\' : '/'; } # CAVEAT: requires output from non-raw parsing due issue similar to GH#2 sub _validate($@) { my( $moiety, $color_sub ) = @_; for my $atom (sort { $a->{number} <=> $b->{number} } $moiety->vertices) { # TODO: AL chiral centers also have to be checked if( is_chiral_tetrahedral( $atom ) ) { if( $moiety->degree($atom) < 3 ) { # FIXME: there should be a strict mode to forbid lone pairs # FIXME: tetrahedral allenes are false-positives warn sprintf 'chiral center %s(%d) has %d bonds while ' . 'at least 3 is required' . "\n", $atom->{symbol}, $atom->{number}, $moiety->degree($atom); } elsif( $moiety->degree($atom) == 4 && $color_sub ) { my %colors = map { ($color_sub->( $_ ) => 1) } $moiety->neighbours($atom); if( scalar keys %colors != 4 && !is_ring_atom( $moiety, $atom, scalar $moiety->edges ) ) { warn sprintf 'tetrahedral chiral setting for %s(%d) ' . 'is not needed as not all 4 neighbours ' . 'are distinct' . "\n", $atom->{symbol}, $atom->{number}; } } } # Warn about unmarked tetrahedral chiral centers if( !is_chiral( $atom ) && $moiety->degree( $atom ) == 4 ) { my $color_sub_local = $color_sub; if( !$color_sub_local ) { $color_sub_local = sub { return $_[0]->{symbol} }; } my %colors = map { ($color_sub_local->( $_ ) => 1) } $moiety->neighbours($atom); if( scalar keys %colors == 4 ) { warn sprintf 'atom %s(%d) has 4 distinct neighbours, ' . 'but does not have a chiral setting' . "\n", $atom->{symbol}, $atom->{number}; } } } # FIXME: establish deterministic order for my $bond ($moiety->edges) { my( $A, $B ) = sort { $a->{number} <=> $b->{number} } @$bond; if( $A eq $B ) { warn sprintf 'atom %s(%d) has bond to itself' . "\n", $A->{symbol}, $A->{number}; } if( $moiety->has_edge_attribute( @$bond, 'bond' ) ) { my $bond_type = $moiety->get_edge_attribute( @$bond, 'bond' ); if( $bond_type eq '=' ) { # Test cis/trans bonds # FIXME: Not sure how to check which definition belongs to # which of the double bonds. See COD entry 1547257. for my $atom (@$bond) { my %bond_types = _neighbours_per_bond_type( $moiety, $atom ); foreach ('/', '\\') { if( $bond_types{$_} && @{$bond_types{$_}} > 1 ) { warn sprintf 'atom %s(%d) has %d bonds of type \'%s\', ' . 'cis/trans definitions must not conflict' . "\n", $atom->{symbol}, $atom->{number}, scalar @{$bond_types{$_}}, $_; } } } } elsif( $bond_type =~ /^[\\\/]$/ ) { # Test if next to a double bond. # FIXME: Yields false-positives for delocalised bonds, # see COD entry 1501863. # FIXME: What about triple bond? See COD entry 4103591. my %bond_types; for my $atom (@$bond) { my %bond_types_now = _neighbours_per_bond_type( $moiety, $atom ); for my $key (keys %bond_types_now) { push @{$bond_types{$key}}, @{$bond_types_now{$key}}; } } if( !$bond_types{'='} ) { warn sprintf 'cis/trans bond is defined between atoms ' . '%s(%d) and %s(%d), but neither of them ' . 'is attached to a double bond' . "\n", $A->{symbol}, $A->{number}, $B->{symbol}, $B->{number}; } } } } # TODO: SP, TB, OH chiral centers } sub _neighbours_per_bond_type { my( $moiety, $atom ) = @_; my %bond_types; for my $neighbour ($moiety->neighbours($atom)) { my $bond_type; if( $moiety->has_edge_attribute( $atom, $neighbour, 'bond' ) ) { $bond_type = $moiety->get_edge_attribute( $atom, $neighbour, 'bond' ); } else { $bond_type = ''; } if( $bond_type =~ /^[\\\/]$/ && $atom->{number} > $neighbour->{number} ) { $bond_type = toggle_cistrans $bond_type; } push @{$bond_types{$bond_type}}, $neighbour; } return %bond_types; } 1; __END__ =pod =head1 NAME Chemistry::OpenSMILES - OpenSMILES format reader and writer =head1 SYNOPSIS use Chemistry::OpenSMILES::Parser; my $parser = Chemistry::OpenSMILES::Parser->new; my @moieties = $parser->parse( 'C#C.c1ccccc1' ); $\ = "\n"; for my $moiety (@moieties) { # $moiety is a Graph::Undirected object print scalar $moiety->vertices; print scalar $moiety->edges; } use Chemistry::OpenSMILES::Writer qw(write_SMILES); print write_SMILES( \@moieties ); =head1 DESCRIPTION Chemistry::OpenSMILES provides support for SMILES chemical identifiers conforming to OpenSMILES v1.0 specification (L). Chemistry::OpenSMILES::Parser reads in SMILES strings and returns them parsed to arrays of L objects. Each atom is represented by a hash. Chemistry::OpenSMILES::Writer performs the inverse operation. Generated SMILES strings are by no means optimal. =head2 Molecular graph Disconnected parts of a compound are represented as separate L objects. Atoms are represented as vertices, and bonds are represented as edges. =head3 Atoms Atoms, or vertices of a molecular graph, are represented as hash references: { "symbol" => "C", "isotope" => 13, "chirality" => "@@", "hcount" => 3, "charge" => 1, "class" => 0, "number" => 0, } Except for C, C and C, all keys of hash are optional. Per OpenSMILES specification, default values for C and C are 0. For chiral atoms, the order of its neighbours in input is preserved in an array added as value for C key of the atom hash. =head3 Bonds Bonds, or edges of a molecular graph, rely completely on L internal representation. Bond orders other than single (C<->, which is also a default) are represented as values of edge attribute C. They correspond to the symbols used in OpenSMILES specification. =head2 Options C accepts the following options for key-value pairs in an anonymous hash for its second parameter: =over =item C In OpenSMILES specification the number of attached hydrogen atoms for atoms in square brackets is limited to 9. IUPAC SMILES+ has increased this number to 99. With the value of C the parser could be instructed to allow other than 1 digit for attached hydrogen count. =item C With C set to anything evaluating to true, the parser will not convert neither implicit nor explicit hydrogen atoms in square brackets to atom hashes of their own. Moreover, it will not attempt to unify the representations of chirality. It should be noted, though, that many of subroutines of Chemistry::OpenSMILES expect non-raw data structures, thus processing raw output may produce distorted results. =back =head1 CAVEATS Element symbols in square brackets are not limited to the ones known to chemistry. Currently any single or two-letter symbol is allowed. Deprecated charge notations (C<--> and C<++>) are supported. OpenSMILES specification mandates a strict order of ring bonds and branches: branched_atom ::= atom ringbond* branch* Chemistry::OpenSMILES::Parser supports both the mandated, and inverted structure, where ring bonds follow branch descriptions. Whitespace is not supported yet. SMILES descriptors must be cleaned of it before attempting reading with Chemistry::OpenSMILES::Parser. The derivation of implicit hydrogen counts for aromatic atoms is not unambiguously defined in the OpenSMILES specification. Thus only aromatic carbon is accounted for as if having valence of 3. Chiral atoms with three neighbours are interpreted as having a lone pair of electrons as the fourth chiral neighbour. The lone pair is always understood as being the second in the order of neighbour enumeration, except when the atom with the lone pair starts a chain. In that case lone pair is the first. =head1 SEE ALSO perl(1) =head1 AUTHORS Andrius Merkys, Emerkys@cpan.orgE =cut Chemistry-OpenSMILES-0.9.0/lib/Chemistry/OpenSMILES/0000775000200400020040000000000014516211770021522 5ustar andriusandriusChemistry-OpenSMILES-0.9.0/lib/Chemistry/OpenSMILES/Parser.yp0000644000200400020040000003614614516211770023340 0ustar andriusandrius# Header section %{ use warnings; use 5.0100; use Chemistry::OpenSMILES qw( %bond_symbol_to_order is_aromatic is_chiral %normal_valence toggle_cistrans ); use Graph::Undirected; use List::Util qw( any sum ); %} %% # Rules section # The top-level 'filter' rule smiles: chain ; chain: atom { my $g = Graph::Undirected->new( refvertexed => 1 ); $g->add_vertex( $_[1] ); push @{$_[0]->{USER}{GRAPHS}}, $g; $_[1]->{graph} = $g; $_[1]->{index} = @{$_[0]->{USER}{GRAPHS}}-1; $_[1]->{first_of_chain} = 1; return { first => $_[1], last => $_[1] }; } | chain atom { $_[2]->{graph} = $_[1]->{last}{graph}; $_[2]->{index} = $_[1]->{last}{index}; $_[2]->{graph}->add_edge( $_[1]->{last}, $_[2] ); if( is_aromatic $_[1]->{last} && is_aromatic $_[2] ) { $_[2]->{graph}->set_edge_attribute( $_[1]->{last}, $_[2], 'bond', ':' ); } delete $_[2]->{first_of_chain}; _push_chirality_neighbour( $_[1]->{last}, $_[2] ); _push_chirality_neighbour( $_[2], $_[1]->{last} ); $_[1]->{last} = $_[2]; return $_[1]; } | chain bond atom { $_[3]->{graph} = $_[1]->{last}{graph}; $_[3]->{index} = $_[1]->{last}{index}; if( $_[2] ne '-' ) { $_[3]->{graph}->set_edge_attribute( $_[1]->{last}, $_[3], 'bond', $_[2] ); } else { $_[3]->{graph}->add_edge( $_[1]->{last}, $_[3] ); } delete $_[3]->{first_of_chain}; _push_chirality_neighbour( $_[1]->{last}, $_[3] ); _push_chirality_neighbour( $_[3], $_[1]->{last} ); $_[1]->{last} = $_[3]; return $_[1]; } | chain '.' atom { my $g = Graph::Undirected->new( refvertexed => 1 ); $g->add_vertex( $_[3] ); push @{$_[0]->{USER}{GRAPHS}}, $g; $_[3]->{graph} = $g; $_[3]->{index} = @{$_[0]->{USER}{GRAPHS}}-1; $_[3]->{first_of_chain} = 1; return { first => $_[3], last => $_[3] }; } | chain '(' chain ')' { if( $_[1]->{last}{index} != $_[3]->{first}{index} ) { $_[0]->_merge_graphs( $_[1]->{last}{index}, $_[3]->{first}{index} ); } $_[1]->{last}{graph}->add_edge( $_[1]->{last}, $_[3]->{first} ); if( is_aromatic $_[1]->{last} && is_aromatic $_[3]->{first} ) { $_[1]->{last}{graph}->set_edge_attribute( $_[1]->{last}, $_[3]->{first}, 'bond', ':' ); } delete $_[3]->{first}{first_of_chain}; _push_chirality_neighbour( $_[1]->{last}, $_[3]->{first} ); _unshift_chirality_neighbour( $_[3]->{first}, $_[1]->{last} ); return $_[1]; } | chain '(' bond chain ')' { if( $_[1]->{last}{index} != $_[4]->{first}{index} ) { $_[0]->_merge_graphs( $_[1]->{last}{index}, $_[4]->{first}{index} ); } if( $_[3] ne '-' ) { $_[1]->{last}{graph}->set_edge_attribute( $_[1]->{last}, $_[4]->{first}, 'bond', $_[3] ); } else { $_[1]->{last}{graph}->add_edge( $_[1]->{last}, $_[4]->{first} ); } delete $_[4]->{first}{first_of_chain}; _push_chirality_neighbour( $_[1]->{last}, $_[4]->{first} ); _unshift_chirality_neighbour( $_[4]->{first}, $_[1]->{last} ); return $_[1]; } | chain '(' '.' chain ')' # According to the specification of OpenSMILES, ring bonds are # allowed only before the branch enumeration. However, I think this # is too strict. | chain ringbond { $_[0]->_add_ring_bond( $_[1]->{last}, $_[2] ); return $_[1]; } | chain bond ringbond { $_[0]->_add_ring_bond( $_[1]->{last}, $_[3], $_[2] ); return $_[1]; } ; bond: '-' | '=' | '#' | '$' | ':' | '/' | '\\' ; %% # Footer section sub _Error { my( $self ) = @_; close $self->{USER}{FILEIN} if $self->{USER}{FILEIN}; if( ${$self->{TOKEN}} eq '' && grep { defined $_ && !ref $_ && $_ eq '(' } map { $_->[1] } @{$self->{STACK}} ) { die "$0: syntax error: missing closing parenthesis.\n"; } if( ${$self->{TOKEN}} eq ')' ) { die "$0: syntax error: unbalanced parentheses.\n"; } my $msg = "$0: syntax error at position $self->{USER}{CHARNO}"; if( $self->YYData->{INPUT} ) { $self->YYData->{INPUT} =~ s/\n$//; die "$msg: '" . $self->YYData->{INPUT} . "'.\n"; } else { die "$msg.\n"; } } sub _Lexer { my( $self ) = @_; # If the line is empty and the input is originating from the file, # another line is read. if( !$self->YYData->{INPUT} && $self->{USER}{FILEIN} ) { my $filein = $self->{USER}{FILEIN}; $self->YYData->{INPUT} = <$filein>; $self->{USER}{CHARNO} = 0; } if( $self->YYData->{INPUT} =~ s/^(\s+)// ) { $self->{USER}{CHARNO} += length $1; } my $hcount_re = 'H[0-9]?'; if( defined $self->{USER}{OPTIONS}{max_hydrogen_count_digits} ) { $hcount_re = sprintf 'H[0-9]{0,%d}', $self->{USER}{OPTIONS}{max_hydrogen_count_digits}; } # Bracket atoms if( $self->YYData->{INPUT} =~ s/^\[ (?[0-9]+)? (?[A-Za-z][a-z]?|\*) (?@( (TH|AL)[12] | SP [123] | (TB|OH)[0-9]{1,2} | @? ))? (? $hcount_re)? (?--|\+\+|[-+][0-9]{0,2})? (:(?[0-9]+))? \]//x ) { my $atom = { %+, number => $self->{USER}{ATOMNO} }; $self->{USER}{ATOMNO} ++; $self->{USER}{CHARNO} += length $&; if( $atom->{charge} ) { $atom->{charge} =~ s/^([-+])$/${1}1/; $atom->{charge} =~ s/^([-+])\1$/${1}2/; $atom->{charge} = int $atom->{charge}; } if( $atom->{hcount} ) { $atom->{hcount} =~ s/^H//; $atom->{hcount} = $atom->{hcount} ? int $atom->{hcount} : 1; } else { $atom->{hcount} = 0; } if( $atom->{isotope} ) { $atom->{isotope} = int $atom->{isotope}; } # Atom class is an arbitrary number, 0 by default $atom->{class} = exists $atom->{class} ? int $atom->{class} : 0; return ( 'atom', $atom ); } # Bracketless atoms if( $self->YYData->{INPUT} =~ s/^(Br|Cl|[BCINOPSFbcnops*])// ) { my $atom = { symbol => $1, class => 0, number => $self->{USER}{ATOMNO} }; $self->{USER}{ATOMNO} ++; $self->{USER}{CHARNO} += length $&; return ( 'atom', $atom ); } # Ring bonds if( $self->YYData->{INPUT} =~ s/^%([0-9]{2})// || $self->YYData->{INPUT} =~ s/^([0-9])// ) { $self->{USER}{CHARNO} += length $&; return ( 'ringbond', int $1 ); } my $char = substr( $self->YYData->{INPUT}, 0, 1 ); if( $char ne '' ) { $self->YYData->{INPUT} = substr( $self->YYData->{INPUT}, 1 ); } $self->{USER}{CHARNO} ++; return( $char, $char ); } sub parse { my( $self, $string, $options ) = @_; $options = {} unless $options; $self->YYData->{INPUT} = $string; $self->{USER}{GRAPHS} = []; $self->{USER}{RINGBONDS} = {}; $self->{USER}{ATOMNO} = 0; $self->{USER}{CHARNO} = 0; $self->{USER}{OPTIONS} = $options; $self->YYParse( yylex => \&_Lexer, yyerror => \&_Error, yydebug => $options->{debug} ); if( scalar keys %{$self->{USER}{RINGBONDS}} ) { die "$0: unclosed ring bond(s) detected: " . join( ', ', sort { $a <=> $b } keys %{$self->{USER}{RINGBONDS}} ) . ".\n"; } my @graphs = grep { defined } @{$self->{USER}{GRAPHS}}; for my $graph (@graphs) { for my $atom (sort { $a->{number} <=> $b->{number} } $graph->vertices) { delete $atom->{graph}; delete $atom->{index}; if( !$options->{raw} ) { # Promote implicit hydrogen atoms into explicit ones if( !exists $atom->{hcount} ) { next if !exists $normal_valence{$atom->{symbol}}; my $degree = sum map { $_ ne ':' && exists $bond_symbol_to_order{$_} ? $bond_symbol_to_order{$_} : 1 } map { $graph->has_edge_attribute( $atom, $_, 'bond' ) ? $graph->get_edge_attribute( $atom, $_, 'bond' ) : '-' } $graph->neighbours( $atom ); $degree = 0 unless $degree; my( $valence ) = grep { $degree <= $_ } @{$normal_valence{$atom->{symbol}}}; next if !defined $valence; $atom->{hcount} = $valence - $degree; } for (1..$atom->{hcount}) { my $hydrogen = { symbol => 'H', class => 0, number => $self->{USER}{ATOMNO} }; $graph->add_edge( $atom, $hydrogen ); $self->{USER}{ATOMNO} ++; if( $atom->{first_of_chain} ) { _unshift_chirality_neighbour( $atom, $hydrogen ); } else { _push_chirality_neighbour( $atom, $hydrogen ); } } delete $atom->{hcount}; # Unify the representation of chirality if( is_chiral $atom ) { if( $atom->{chirality} =~ /^@@?$/ && $graph->degree( $atom ) == 2 ) { $atom->{chirality} =~ s/@+/'@AL' . length $&/e; } $atom->{chirality} =~ s/^\@TH1$/@/; $atom->{chirality} =~ s/^\@TH2$/@@/; } # Adjust chirality for centers having lone pairs if( is_chiral $atom && $atom->{first_of_chain} && $atom->{chirality} =~ /^@@?$/ && $atom->{chirality_neighbours} && scalar @{$atom->{chirality_neighbours}} == 3 ) { $atom->{chirality} = $atom->{chirality} eq '@' ? '@@' : '@'; } } delete $atom->{first_of_chain}; } } return @graphs; } sub _add_ring_bond { my( $self, $atom, $ring_bond, $bond ) = @_; if( $self->{USER}{RINGBONDS}{$ring_bond} ) { $self->_merge_graphs( $self->{USER}{RINGBONDS}{$ring_bond}{atom}{index}, $atom->{index} ); if( $bond && $self->{USER}{RINGBONDS}{$ring_bond}{bond} && (($bond !~ /^[\\\/]$/ && $bond ne $self->{USER}{RINGBONDS}{$ring_bond}{bond}) || ($bond eq '\\' && $self->{USER}{RINGBONDS}{$ring_bond}{bond} ne '/') || ($bond eq '/' && $self->{USER}{RINGBONDS}{$ring_bond}{bond} ne '\\')) ) { die "$0: ring bond types for ring bond $ring_bond do not match.\n"; } ( $bond ) = grep { defined } ( $self->{USER}{RINGBONDS}{$ring_bond}{bond}, $bond ); if( $bond && $bond =~ /^[\\\/]$/ && !defined $self->{USER}{RINGBONDS}{$ring_bond}{bond} ) { # If cis/trans marker is not specified when cis/trans bond is # seen first, it has to be inverted: $bond = toggle_cistrans $bond; } my $ring_atom = $self->{USER}{RINGBONDS}{$ring_bond}{atom}; if( !$bond && is_aromatic $ring_atom && is_aromatic $atom ) { $bond = ':'; } if( $bond && $bond ne '-' ) { $atom->{graph}->set_edge_attribute( $ring_atom, $atom, 'bond', $bond ); } else { $atom->{graph}->add_edge( $ring_atom, $atom ); } delete $self->{USER}{RINGBONDS}{$ring_bond}; if( is_chiral $ring_atom && $ring_atom->{chirality_neighbours} ) { my( $pos ) = grep { !ref $ring_atom->{chirality_neighbours}[$_] && $ring_atom->{chirality_neighbours}[$_] == $ring_bond } 0..$#{$ring_atom->{chirality_neighbours}}; $ring_atom->{chirality_neighbours}[$pos] = $atom if defined $pos; } _push_chirality_neighbour( $atom, $ring_atom ); } else { $self->{USER}{RINGBONDS}{$ring_bond} = { atom => $atom, $bond ? ( bond => $bond ) : () }; # Record a placeholder for later addition of real chirality # neighbour, which will be identified by the ring bond number _push_chirality_neighbour( $atom, $ring_bond ); } } sub _merge_graphs { my( $self, $index1, $index2 ) = @_; return if $index1 == $index2; my $g1 = $self->{USER}{GRAPHS}[$index1]; my $g2 = $self->{USER}{GRAPHS}[$index2]; for ($g2->vertices) { $_->{graph} = $g1; $_->{index} = $index1; } $g1->add_vertices( $g2->vertices ); for ($g2->edges) { my $attributes = $g2->get_edge_attributes( @$_ ); if( $attributes ) { $g1->set_edge_attributes( @$_, $attributes ); } else { $g1->add_edge( @$_ ); } } $self->{USER}{GRAPHS}[$index2] = undef; } sub _push_chirality_neighbour { my( $atom1, $atom2 ) = @_; return unless is_chiral $atom1; push @{$atom1->{chirality_neighbours}}, $atom2; } sub _unshift_chirality_neighbour { my( $atom1, $atom2 ) = @_; return unless is_chiral $atom1; unshift @{$atom1->{chirality_neighbours}}, $atom2; } 1; Chemistry-OpenSMILES-0.9.0/lib/Chemistry/OpenSMILES/Aromaticity.pm0000644000200400020040000001364614516211770024355 0ustar andriusandriuspackage Chemistry::OpenSMILES::Aromaticity; use strict; use warnings; # ABSTRACT: Aromaticity handling routines our $VERSION = '0.9.0'; # VERSION use Chemistry::OpenSMILES qw( is_aromatic is_double_bond is_single_bond ); use Graph::Traversal::DFS; use List::Util qw( all ); =head1 NAME Chemistry::OpenSMILES::Aromaticity - Aromaticity handling routines =head1 DESCRIPTION Chemistry::OpenSMILES::Aromaticity encodes some aromaticity handling subroutines for aromatisation and kekulisation. Both implementations are experimental, handle only some specific cases and are neither stable nor bug-free, thus should be used with caution. =cut require Exporter; our @ISA = qw( Exporter ); our @EXPORT_OK = qw( aromatise electron_cycles kekulise ); =head1 METHODS =over 4 =item aromatise( $moiety ) Mark electron cycles as aromatic. =cut sub aromatise { my( $moiety ) = @_; my @electron_cycles = electron_cycles( $moiety ); for my $cycle (@electron_cycles) { for my $i (0..$#$cycle) { # Set bond to aromatic $moiety->set_edge_attribute( $cycle->[$i], $cycle->[($i + 1) % scalar @$cycle], 'bond', ':' ); # Set atom to aromatic if( $cycle->[$i]{symbol} =~ /^([BCNOPS]|Se|As)$/ ) { $cycle->[$i]{symbol} = lcfirst $cycle->[$i]{symbol}; } } } } =item kekulise( $moiety ) Find nonfused even-length aromatic cycles consisting only of B, C, N, P, S and mark them with aliterating single and double bonds. =cut sub kekulise { my( $moiety ) = @_; my $aromatic_only = $moiety->copy_graph; $aromatic_only->delete_vertices( grep { !is_aromatic $_ } $aromatic_only->vertices ); my @components; my $get_root = sub { my( $self, $unseen ) = @_; my( $next ) = sort { $unseen->{$a}{number} <=> $unseen->{$b}{number} } keys %$unseen; return unless defined $next; push @components, []; return $unseen->{$next}; }; my $operations = { first_root => $get_root, next_root => $get_root, pre => sub { push @{$components[-1]}, $_[0] }, }; my $traversal = Graph::Traversal::DFS->new( $aromatic_only, %$operations ); $traversal->dfs; for my $component (@components) { # Taking only simple even-length cycles into consideration next unless all { $aromatic_only->degree( $_ ) == 2 } @$component; next unless all { $moiety->degree( $_ ) <= 3 } @$component; next unless all { $_->{symbol} =~ /^[BCNPS]$/i } @$component; next if @$component % 2; my( $first ) = sort { $a->{number} <=> $b->{number} } @$component; my( $second ) = sort { $a->{number} <=> $b->{number} } $aromatic_only->neighbours( $first ); my $n = 0; while( $n < @$component ) { $first->{symbol} = ucfirst $first->{symbol}; if( $n % 2 ) { $moiety->set_edge_attribute( $first, $second, 'bond', '=' ); } else { $moiety->delete_edge_attribute( $first, $second, 'bond' ); } ( $first, $second ) = ( $second, grep { $_ ne $first } $aromatic_only->neighbours( $second ) ); $n++; } } } =item electron_cycles( $moiety ) Find electron cycles according to "Finding Electron Cycles" algorithm from L. Use with caution: the implementation is experimental. =cut sub electron_cycles { my( $moiety ) = @_; my @cycles; for my $start ($moiety->vertices) { my %seen; my %prev; my $operations = { start => sub { return $start }, pre => sub { $seen{$_[0]} = 1 }, pre_edge => sub { my( $u, $v ) = @_; ( $u, $v ) = ( $v, $u ) if $seen{$v}; $prev{$v} = $u; }, non_tree_edge => sub { my( $u, $v ) = @_; if( $u == $start || $v == $start ) { ( $u, $v ) = ( $v, $u ) if $v == $start; my $current = $v; my $prev_bond_is_single; my $cycle_is_alterating = 1; my @cycle = ( $u ); while( $prev{$current} ) { if( ( !defined $prev_bond_is_single && ( is_single_bond( $moiety, $current, $prev{$current} ) || is_double_bond( $moiety, $current, $prev{$current} ) ) ) || ( $prev_bond_is_single && is_double_bond( $moiety, $current, $prev{$current} ) ) || ( !$prev_bond_is_single && is_single_bond( $moiety, $current, $prev{$current} ) ) ) { # Logic is inverted here as $prev_bond_is_single is # inverted after the conditional. $prev_bond_is_single = !is_single_bond( $moiety, $current, $prev{$current} ); push @cycle, $current; $current = $prev{$current}; } else { $cycle_is_alterating = 0; last; } last unless $cycle_is_alterating; $prev_bond_is_single = 1 - $prev_bond_is_single; } push @cycles, \@cycle if $cycle_is_alterating; } }, }; my $traversal = Graph::Traversal::DFS->new( $moiety, %$operations ); $traversal->dfs; } my %unique; for (@cycles) { $unique{join '', sort @$_} = $_; } return values %unique; } =back =cut 1; Chemistry-OpenSMILES-0.9.0/lib/Chemistry/OpenSMILES/Writer.pm0000644000200400020040000003006514516211770023336 0ustar andriusandriuspackage Chemistry::OpenSMILES::Writer; use strict; use warnings; use Chemistry::OpenSMILES qw( is_aromatic is_chiral toggle_cistrans ); use Chemistry::OpenSMILES::Parser; use Graph::Traversal::DFS; use List::Util qw( all any uniq ); # ABSTRACT: OpenSMILES format writer our $VERSION = '0.9.0'; # VERSION require Exporter; our @ISA = qw( Exporter ); our @EXPORT_OK = qw( write_SMILES ); sub write_SMILES { my( $what, $order_sub ) = @_; if( ref $what eq 'HASH' ) { # subroutine will also accept and properly represent a single # atom: return _pre_vertex( $what ); } my @moieties = ref $what eq 'ARRAY' ? @$what : ( $what ); my @components; $order_sub = \&_order unless $order_sub; for my $graph (@moieties) { my @symbols; my %vertex_symbols; my $nrings = 0; my %seen_rings; my @chiral; my %discovered_from; my $rings = {}; my $operations = { tree_edge => sub { my( $seen, $unseen, $self ) = @_; if( $vertex_symbols{$unseen} ) { ( $seen, $unseen ) = ( $unseen, $seen ); } push @symbols, _tree_edge( $seen, $unseen, $self, $order_sub ); $discovered_from{$unseen} = $seen }, non_tree_edge => sub { my @sorted = sort { $vertex_symbols{$a} <=> $vertex_symbols{$b} } @_[0..1]; $rings->{$vertex_symbols{$_[0]}} {$vertex_symbols{$_[1]}} = $rings->{$vertex_symbols{$_[1]}} {$vertex_symbols{$_[0]}} = _depict_bond( @sorted, $graph ) }, pre => sub { my( $vertex, $dfs ) = @_; push @chiral, $vertex if is_chiral $vertex; push @symbols, _pre_vertex( { map { $_ => $vertex->{$_} } grep { $_ ne 'chirality' } keys %$vertex } ); $vertex_symbols{$vertex} = $#symbols }, post => sub { push @symbols, ')' }, next_root => undef, }; if( $order_sub ) { $operations->{first_root} = sub { return $order_sub->( $_[1], $_[0]->graph ) }; $operations->{next_successor} = sub { return $order_sub->( $_[1], $_[0]->graph ) }; } my $traversal = Graph::Traversal::DFS->new( $graph, %$operations ); $traversal->dfs; if( scalar keys %vertex_symbols != scalar $graph->vertices ) { warn scalar( $graph->vertices ) - scalar( keys %vertex_symbols ) . ' unreachable atom(s) detected in moiety' . "\n"; } next unless @symbols; pop @symbols; # Dealing with chirality for my $atom (@chiral) { next unless $atom->{chirality} =~ /^@(@?|SP[123])$/; my @neighbours = $graph->neighbours($atom); if( scalar @neighbours < 3 || scalar @neighbours > 4 ) { # TODO: process also configurations other than tetrahedral warn "chirality '$atom->{chirality}' observed for atom " . 'with ' . scalar @neighbours . ' neighbours, can only ' . 'process tetrahedral chiral centers with possible ' . 'lone pairs' . "\n"; next; } my $chirality_now = $atom->{chirality}; if( $atom->{chirality_neighbours} ) { if( scalar @neighbours != scalar @{$atom->{chirality_neighbours}} ) { warn 'number of neighbours does not match the length ' . "of 'chirality_neighbours' array, cannot process " . 'such chiral centers' . "\n"; next; } my %indices; for (0..$#{$atom->{chirality_neighbours}}) { my $pos = $_; if( scalar @{$atom->{chirality_neighbours}} == 3 && $_ != 0 ) { # Lone pair is always second in the chiral neighbours array $pos++; } $indices{$vertex_symbols{$atom->{chirality_neighbours}[$_]}} = $pos; } my @order_new; # In newly established order, the atom from which this one # is discovered (left hand side) will be the first, if any if( $discovered_from{$atom} ) { push @order_new, $indices{$vertex_symbols{$discovered_from{$atom}}}; } # Second, there will be ring bonds as they are added the # first of all the neighbours if( $rings->{$vertex_symbols{$atom}} ) { push @order_new, map { $indices{$_} } sort { $a <=> $b } keys %{$rings->{$vertex_symbols{$atom}}}; } # Finally, all neighbours are added, uniq will remove duplicates push @order_new, map { $indices{$_} } sort { $a <=> $b } map { $vertex_symbols{$_} } @neighbours; @order_new = uniq @order_new; if( scalar @order_new == 3 ) { # Accommodate the lone pair if( $discovered_from{$atom} ) { @order_new = ( $order_new[0], 1, @order_new[1..2] ); } else { unshift @order_new, 1; } } if( $atom->{chirality} =~ /^@@?$/ ) { # Tetragonal centers if( join( '', _permutation_order( @order_new ) ) ne '0123' ) { $chirality_now = $chirality_now eq '@' ? '@@' : '@'; } } else { # Square planar centers $chirality_now = _square_planar_chirality( @order_new, $chirality_now ); } } my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph_reparsed ) = $parser->parse( $symbols[$vertex_symbols{$atom}], { raw => 1 } ); my( $atom_reparsed ) = $graph_reparsed->vertices; $atom_reparsed->{chirality} = $chirality_now; $symbols[$vertex_symbols{$atom}] = write_SMILES( $atom_reparsed ); } # Adding ring numbers my @ring_ids = ( 1..99, 0 ); my @ring_ends; for my $i (0..$#symbols) { if( $rings->{$i} ) { for my $j (sort { $a <=> $b } keys %{$rings->{$i}}) { next if $i > $j; if( !@ring_ids ) { # All 100 rings are open now. There is no other # solution but to terminate the program. die 'cannot represent more than 100 open ring' . ' bonds'; } $symbols[$i] .= $rings->{$i}{$j} . ($ring_ids[0] < 10 ? '' : '%') . $ring_ids[0]; $symbols[$j] .= ($rings->{$i}{$j} eq '/' ? '\\' : $rings->{$i}{$j} eq '\\' ? '/' : $rings->{$i}{$j}) . ($ring_ids[0] < 10 ? '' : '%') . $ring_ids[0]; push @{$ring_ends[$j]}, shift @ring_ids; } } if( $ring_ends[$i] ) { # Ring bond '0' must stay in the end @ring_ids = sort { ($a == 0) - ($b == 0) || $a <=> $b } (@{$ring_ends[$i]}, @ring_ids); } } push @components, join '', @symbols; } return join '.', @components; } # DEPRECATED sub write { &write_SMILES; } sub _tree_edge { my( $u, $v, $self ) = @_; return '(' . _depict_bond( $u, $v, $self->graph ); } sub _pre_vertex { my( $vertex ) = @_; my $atom = $vertex->{symbol}; my $is_simple = $atom =~ /^[bcnosp]$/i || $atom =~ /^(F|Cl|Br|I|\*)$/; if( exists $vertex->{isotope} ) { $atom = $vertex->{isotope} . $atom; $is_simple = 0; } if( is_chiral $vertex ) { $atom .= $vertex->{chirality}; $is_simple = 0; } if( $vertex->{hcount} ) { # if non-zero $atom .= 'H' . ($vertex->{hcount} == 1 ? '' : $vertex->{hcount}); $is_simple = 0; } if( $vertex->{charge} ) { # if non-zero $atom .= ($vertex->{charge} > 0 ? '+' : '') . $vertex->{charge}; $atom =~ s/([-+])1$/$1/; $is_simple = 0; } if( $vertex->{class} ) { # if non-zero $atom .= ':' . $vertex->{class}; $is_simple = 0; } return $is_simple ? $atom : "[$atom]"; } sub _depict_bond { my( $u, $v, $graph ) = @_; if( !$graph->has_edge_attribute( $u, $v, 'bond' ) ) { return is_aromatic $u && is_aromatic $v ? '-' : ''; } my $bond = $graph->get_edge_attribute( $u, $v, 'bond' ); return $bond if $bond ne '/' && $bond ne '\\'; return $bond if $u->{number} < $v->{number}; return toggle_cistrans $bond; } # Reorder a permutation of elements 0, 1, 2 and 3 by taking an element # and moving it two places either forward or backward in the line. This # subroutine is used to check whether a sign change of tetragonal # chirality is required or not. sub _permutation_order { # Safeguard against endless cycles due to undefined values if( (scalar @_ != 4) || (any { !defined || !/^[0-3]$/ } @_) || (join( ',', sort @_ ) ne '0,1,2,3') ) { warn '_permutation_order() accepts only permutations of numbers ' . "'0', '1', '2' and '3', unexpected input received"; return 0..3; # Return original order } while( $_[2] == 0 || $_[3] == 0 ) { @_ = ( $_[0], @_[2..3], $_[1] ); } if( $_[0] != 0 ) { @_ = ( @_[1..2], $_[0], $_[3] ); } while( $_[1] != 1 ) { @_[1..3] = ( @_[2..3], $_[1] ); } return @_; } sub _square_planar_chirality { my $chirality = pop @_; my @source = 0..3; my @target = @_; if( join( ',', sort @_ ) ne '0,1,2,3' ) { die '_square_planar_chirality() accepts only permutations of ' . "numbers '0', '1', '2' and '3', unexpected input received"; } # Rotations until 0 is first while( $source[0] != $target[0] ) { push @source, shift @source; my %tab = ( '@SP1' => '@SP1', '@SP2' => '@SP3', '@SP3' => '@SP2' ); $chirality = $tab{$chirality}; } if( $source[3] == $target[1] ) { # Swap the right side ( $source[2], $source[3] ) = ( $source[3], $source[2] ); my %tab = ( '@SP1' => '@SP3', '@SP2' => '@SP2', '@SP3' => '@SP1' ); $chirality = $tab{$chirality}; } if( $source[2] == $target[1] ) { # Swap the center ( $source[1], $source[2] ) = ( $source[2], $source[1] ); my %tab = ( '@SP1' => '@SP2', '@SP2' => '@SP1', '@SP3' => '@SP3' ); $chirality = $tab{$chirality}; } if( $source[3] == $target[2] ) { # Swap the right side ( $source[2], $source[3] ) = ( $source[3], $source[2] ); my %tab = ( '@SP1' => '@SP3', '@SP2' => '@SP2', '@SP3' => '@SP1' ); $chirality = $tab{$chirality}; } return $chirality; } sub _order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$a}{number} <=> $vertices->{$b}{number} } keys %$vertices; return $vertices->{shift @sorted}; } 1; Chemistry-OpenSMILES-0.9.0/lib/Chemistry/OpenSMILES/Stereo.pm0000644000200400020040000003433414516211770023326 0ustar andriusandriuspackage Chemistry::OpenSMILES::Stereo; use strict; use warnings; # ABSTRACT: Stereochemistry handling routines our $VERSION = '0.9.0'; # VERSION require Exporter; our @ISA = qw( Exporter ); our @EXPORT_OK = qw( chirality_to_pseudograph cis_trans_to_pseudoedges mark_all_double_bonds mark_cis_trans ); use Chemistry::OpenSMILES qw( is_cis_trans_bond is_double_bond is_ring_bond is_single_bond toggle_cistrans ); use Chemistry::OpenSMILES::Writer qw( write_SMILES ); use Graph::Traversal::BFS; use Graph::Undirected; use List::Util qw( all any max min sum sum0 uniq ); sub mark_all_double_bonds { my( $graph, $setting_sub, $order_sub, $color_sub ) = @_; # By default, whenever there is a choice between atoms, the one with # lowest position in the input SMILES is chosen: $order_sub = sub { return $_[0]->{number} } unless $order_sub; # Select non-ring double bonds my @double_bonds = grep { is_double_bond( $graph, @$_ ) && !is_ring_bond( $graph, @$_ ) && !is_unimportant_double_bond( $graph, @$_, $color_sub ) } $graph->edges; # Construct a double bond incidence graph. Vertices are double bonds # and edges are between those double bonds that separated by a single # single ('-') bond. Interestingly, incidence graph for SMILES C=C(C)=C # is connected, but for C=C=C not. This is because allenal systems # cannot be represented yet. my $bond_graph = Graph::Undirected->new; my %incident_double_bonds; for my $bond (@double_bonds) { $bond_graph->add_vertex( join '', sort @$bond ); push @{$incident_double_bonds{$bond->[0]}}, $bond; push @{$incident_double_bonds{$bond->[1]}}, $bond; } for my $bond ($graph->edges) { next unless is_single_bond( $graph, @$bond ); my @adjacent_bonds; if( $incident_double_bonds{$bond->[0]} ) { push @adjacent_bonds, @{$incident_double_bonds{$bond->[0]}}; } if( $incident_double_bonds{$bond->[1]} ) { push @adjacent_bonds, @{$incident_double_bonds{$bond->[1]}}; } for my $bond1 (@adjacent_bonds) { for my $bond2 (@adjacent_bonds) { next if $bond1 == $bond2; $bond_graph->add_edge( join( '', sort @$bond1 ), join( '', sort @$bond2 ) ); } } } # In principle, bond graph could be splitted into separate components # to reduce the number of cycles needed by Morgan algorithm, but I do # not think there is a failure case because of keeping them together. # Set up initial invariants my %invariants; for ($bond_graph->vertices) { $invariants{$_} = $bond_graph->degree( $_ ); } my %distinct_invariants = map { $_ => 1 } values %invariants; # Perform Morgan algorithm while( 1 ) { my %invariants_now; for ($bond_graph->vertices) { $invariants_now{$_} = sum0 map { $invariants{$_} } $bond_graph->neighbours( $_ ); } my %distinct_invariants_now = map { $_ => 1 } values %invariants_now; last if %distinct_invariants_now <= %distinct_invariants; %invariants = %invariants_now; %distinct_invariants = %distinct_invariants_now; } # Establish a deterministic order favouring bonds with higher invariants. # If invariants are equal, order bonds by their atom numbers. @double_bonds = sort { $invariants{join '', sort @$b} <=> $invariants{join '', sort @$a} || (min map { $order_sub->($_) } @$a) <=> (min map { $order_sub->($_) } @$b) || (max map { $order_sub->($_) } @$a) <=> (max map { $order_sub->($_) } @$b) } @double_bonds; for (@double_bonds) { mark_cis_trans( $graph, @$_, $setting_sub, $order_sub ); } } # Requires double bonds in input. Does not check whether a bond belongs # to a ring or not. sub mark_cis_trans { my( $graph, $atom2, $atom3, $setting_sub, $order_sub ) = @_; # By default, whenever there is a choice between atoms, the one with # lowest position in the input SMILES is chosen: $order_sub = sub { return $_[0]->{number} } unless $order_sub; my @neighbours2 = $graph->neighbours( $atom2 ); my @neighbours3 = $graph->neighbours( $atom3 ); return if @neighbours2 < 2 || @neighbours3 < 2; # TODO: Currently we are choosing either a pair of # neighbouring atoms which have no cis/trans markers or # a pair of which a single atom has a cis/trans marker. # The latter case allows to accommodate adjacent double # bonds. However, there may be a situation where both # atoms already have cis/trans markers, but could still # be reconciled. my @cistrans_bonds2 = grep { is_cis_trans_bond( $graph, $atom2, $_ ) } @neighbours2; my @cistrans_bonds3 = grep { is_cis_trans_bond( $graph, $atom3, $_ ) } @neighbours3; if( @cistrans_bonds2 + @cistrans_bonds3 > 1 ) { warn 'cannot represent cis/trans bond between atoms ' . join( ' and ', sort map { $_->{number} } $atom2, $atom3 ) . ' as there are other cis/trans bonds nearby' . "\n"; return; } if( (@neighbours2 == 2 && !@cistrans_bonds2 && !any { is_single_bond( $graph, $atom2, $_ ) } @neighbours2) || (@neighbours3 == 2 && !@cistrans_bonds3 && !any { is_single_bond( $graph, $atom3, $_ ) } @neighbours3) ) { # Azide group (N=N#N) or conjugated allene-like systems (=C=) warn 'atoms ' . join( ' and ', sort map { $_->{number} } $atom2, $atom3 ) . ' are part of conjugated double/triple bond system, thus ' . 'cis/trans setting of their bond is impossible to represent ' . '(not supported yet)' . "\n"; return; } # Making the $atom2 be the one which has a defined cis/trans bond. # Also, a deterministic ordering of atoms in bond is achieved here. if( @cistrans_bonds3 || (!@cistrans_bonds2 && $order_sub->($atom2) > $order_sub->($atom3)) ) { ( $atom2, $atom3 ) = ( $atom3, $atom2 ); @neighbours2 = $graph->neighbours( $atom2 ); @neighbours3 = $graph->neighbours( $atom3 ); @cistrans_bonds2 = @cistrans_bonds3; @cistrans_bonds3 = (); } # Establishing the canonical order @neighbours2 = sort { $order_sub->($a) <=> $order_sub->($b) } grep { is_single_bond( $graph, $atom2, $_ ) } @neighbours2; @neighbours3 = sort { $order_sub->($a) <=> $order_sub->($b) } grep { is_single_bond( $graph, $atom3, $_ ) } @neighbours3; # Check if there is a chance to have anything marked my $bond_will_be_marked; for my $atom1 (@cistrans_bonds2, @neighbours2) { for my $atom4 (@neighbours3) { my $setting = $setting_sub->( $atom1, $atom2, $atom3, $atom4 ); if( $setting ) { $bond_will_be_marked = 1; last; } } } if( !$bond_will_be_marked ) { warn 'cannot represent cis/trans bond between atoms ' . join( ' and ', sort map { $_->{number} } $atom2, $atom3 ) . ' as there are no eligible single bonds nearby' . "\n"; return; } # If there is an atom with cis/trans bond, then this is this one my( $first_atom ) = @cistrans_bonds2 ? @cistrans_bonds2 : @neighbours2; if( !@cistrans_bonds2 ) { $graph->set_edge_attribute( $first_atom, $atom2, 'bond', '/' ); } my $atom4_marked; for my $atom4 (@neighbours3) { my $atom1 = $first_atom; my $setting = $setting_sub->( $atom1, $atom2, $atom3, $atom4 ); next unless $setting; my $other = $graph->get_edge_attribute( $atom1, $atom2, 'bond' ); $other = toggle_cistrans $other if $setting eq 'cis'; $other = toggle_cistrans $other if $atom1->{number} > $atom2->{number}; $other = toggle_cistrans $other if $atom4->{number} < $atom3->{number}; $graph->set_edge_attribute( $atom3, $atom4, 'bond', $other ); $atom4_marked = $atom4 unless $atom4_marked; } for my $atom1 (@neighbours2) { next if $atom1 eq $first_atom; # Marked already my $atom4 = $atom4_marked; my $setting = $setting_sub->( $atom1, $atom2, $atom3, $atom4 ); next unless $setting; my $other = $graph->get_edge_attribute( $atom3, $atom4, 'bond' ); $other = toggle_cistrans $other if $setting eq 'cis'; $other = toggle_cistrans $other if $atom1->{number} > $atom2->{number}; $other = toggle_cistrans $other if $atom4->{number} < $atom3->{number}; $graph->set_edge_attribute( $atom1, $atom2, 'bond', $other ); } } # Store the tetrahedral chirality character as additional pseudo vertices # and edges. sub chirality_to_pseudograph { my( $moiety ) = @_; for my $atom ($moiety->vertices) { next unless Chemistry::OpenSMILES::is_chiral_tetrahedral( $atom ); next unless @{$atom->{chirality_neighbours}} >= 3 && @{$atom->{chirality_neighbours}} <= 4; my @chirality_neighbours = @{$atom->{chirality_neighbours}}; if( @chirality_neighbours == 3 ) { @chirality_neighbours = ( $chirality_neighbours[0], {}, # marking the lone pair @chirality_neighbours[1..2] ); } if( $atom->{chirality} eq '@' ) { # Reverse the order if counter-clockwise @chirality_neighbours = ( $chirality_neighbours[0], reverse @chirality_neighbours[1..3] ); } for my $i (0..3) { my $neighbour = $chirality_neighbours[$i]; my @chirality_neighbours_now = @chirality_neighbours; if( $i % 2 ) { # Reverse the order due to projected atom change @chirality_neighbours_now = ( $chirality_neighbours_now[0], reverse @chirality_neighbours_now[1..3] ); } my @other = grep { $_ != $neighbour } @chirality_neighbours_now; for my $offset (0..2) { my $connector = {}; $moiety->set_edge_attribute( $neighbour, $connector, 'chiral', 'from' ); $moiety->set_edge_attribute( $atom, $connector, 'chiral', 'to' ); $moiety->set_edge_attribute( $connector, $other[0], 'chiral', 1 ); $moiety->set_edge_attribute( $connector, $other[1], 'chiral', 2 ); $moiety->set_edge_attribute( $connector, $other[2], 'chiral', 3 ); push @other, shift @other; } } } } sub cis_trans_to_pseudoedges { my( $moiety ) = @_; # Select non-ring double bonds my @double_bonds = grep { is_double_bond( $moiety, @$_ ) && !is_ring_bond( $moiety, @$_ ) && !is_unimportant_double_bond( $moiety, @$_ ) } $moiety->edges; # Connect cis/trans atoms in double bonds with pseudo-edges for my $bond (@double_bonds) { my( $atom2, $atom3 ) = @$bond; my @atom2_neighbours = grep { !is_pseudoedge( $moiety, $atom2, $_ ) } $moiety->neighbours( $atom2 ); my @atom3_neighbours = grep { !is_pseudoedge( $moiety, $atom3, $_ ) } $moiety->neighbours( $atom3 ); next if @atom2_neighbours < 2 || @atom2_neighbours > 3 || @atom3_neighbours < 2 || @atom3_neighbours > 3; my( $atom1 ) = grep { is_cis_trans_bond( $moiety, $atom2, $_ ) } @atom2_neighbours; my( $atom4 ) = grep { is_cis_trans_bond( $moiety, $atom3, $_ ) } @atom3_neighbours; next unless $atom1 && $atom4; my( $atom1_para ) = grep { $_ != $atom1 && $_ != $atom3 } @atom2_neighbours; my( $atom4_para ) = grep { $_ != $atom4 && $_ != $atom2 } @atom3_neighbours; my $is_cis = $moiety->get_edge_attribute( $atom1, $atom2, 'bond' ) ne $moiety->get_edge_attribute( $atom3, $atom4, 'bond' ); $is_cis = !$is_cis if $atom1->{number} > $atom2->{number}; $is_cis = !$is_cis if $atom3->{number} > $atom4->{number}; $moiety->set_edge_attribute( $atom1, $atom4, 'pseudo', $is_cis ? 'cis' : 'trans' ); if( $atom1_para ) { $moiety->set_edge_attribute( $atom1_para, $atom4, 'pseudo', $is_cis ? 'trans' : 'cis' ); } if( $atom4_para ) { $moiety->set_edge_attribute( $atom1, $atom4_para, 'pseudo', $is_cis ? 'trans' : 'cis' ); } if( $atom1_para && $atom4_para ) { $moiety->set_edge_attribute( $atom1_para, $atom4_para, 'pseudo', $is_cis ? 'cis' : 'trans' ); } } # Unset cis/trans bond markers during second pass for my $bond ($moiety->edges) { next unless is_cis_trans_bond( $moiety, @$bond ); $moiety->delete_edge_attribute( @$bond, 'bond' ); } } sub is_pseudoedge { my( $moiety, $a, $b ) = @_; return $moiety->has_edge_attribute( $a, $b, 'pseudo' ); } # An "unimportant" double bond is one which has chemically identical atoms on one of its sides. # If C<$color_sub> is given, it is used to determine chemical identity of atoms. # If not, only leaf atoms are considered and compared. sub is_unimportant_double_bond { my( $moiety, $a, $b, $color_sub ) = @_; my @a_neighbours = grep { $_ != $b } $moiety->neighbours( $a ); my @b_neighbours = grep { $_ != $a } $moiety->neighbours( $b ); for (\@a_neighbours, \@b_neighbours) { next unless @$_ == 2; my @representations; if( $color_sub ) { @representations = map { $color_sub->( $_ ) } @$_; } else { next if any { $moiety->degree( $_ ) != 1 } @$_; @representations = map { write_SMILES( $_ ) } @$_; } return 1 if uniq( @representations ) == 1; } return; } 1; Chemistry-OpenSMILES-0.9.0/META.json0000644000200400020040000000306014516211770016605 0ustar andriusandrius{ "abstract" : "OpenSMILES format reader and writer", "author" : [ "Andrius Merkys " ], "dynamic_config" : 0, "generated_by" : "Dist::Zilla version 6.012, CPAN::Meta::Converter version 2.150010", "license" : [ "bsd" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, "name" : "Chemistry-OpenSMILES", "prereqs" : { "build" : { "requires" : { "Module::Build" : "0.28", "Module::Build::Parse::Yapp" : "v0.1.2" } }, "configure" : { "requires" : { "Module::Build" : "0.28", "Module::Build::Parse::Yapp" : "v0.1.2" } }, "runtime" : { "requires" : { "Graph" : "0.97", "List::Util" : "1.45", "Parse::Yapp" : "0", "perl" : "5.010000" } }, "test" : { "requires" : { "Test::More" : "0" } } }, "release_status" : "stable", "resources" : { "bugtracker" : { "web" : "https://github.com/merkys/chemistry-opensmiles/issues" }, "homepage" : "https://search.cpan.org/dist/Chemistry-OpenSMILES", "repository" : { "type" : "git", "url" : "git://github.com/merkys/chemistry-opensmiles.git", "web" : "https://github.com/merkys/chemistry-opensmiles" } }, "version" : "0.9.0", "x_generated_by_perl" : "v5.30.0", "x_serialization_backend" : "Cpanel::JSON::XS version 4.19" }