Chemistry-OpenSMILES-0.7.0/0000775000200400020040000000000014137221046015161 5ustar andriusandriusChemistry-OpenSMILES-0.7.0/dist.ini0000644000200400020040000000134114137221046016622 0ustar andriusandriusname = Chemistry-OpenSMILES author = Andrius Merkys license = BSD copyright_holder = Andrius Merkys copyright_year = 2020-2021 version = 0.7.0 [@Filter] -bundle = @Basic -remove = MakeMaker -remove = License [AutoMetaResources] homepage = https://search.cpan.org/dist/%{dist} repository.github = user:merkys bugtracker.github = user:merkys [MetaJSON] [MinimumPerlFast] [ModuleBuild] mb_class = Module::Build::Parse::Yapp build_element = yp [OurPkgVersion] [Prereqs] Graph = 0.97 List::Util = 1.45 Parse::Yapp = 0 [Prereqs / Build] -phase = build Module::Build::Parse::Yapp = 0.1.2 [Prereqs / Configure] -phase = configure Module::Build::Parse::Yapp = 0.1.2 [Prereqs / Test] -phase = test Test::More = 0 Chemistry-OpenSMILES-0.7.0/LICENSE0000644000200400020040000000273314137221046016171 0ustar andriusandriusCopyright (c) The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Chemistry-OpenSMILES-0.7.0/Changes0000644000200400020040000000757614137221046016471 0ustar andriusandrius0.7.0 2021-10-30 - Fixing important issue with representing tetrahedral chiral centers by introducing 'chirality_neighbours' key to atom hashes. 0.6.0 2021-10-18 - Allowing multi-digit hydrogen counts if enabled via parser option. - Standardizing chiral markers for allenal carbons. - Not standardizing chiral markers in raw output. - Reporting '@' and '@@' chiralities with other than 4 neighbours as they cannot be processed right now. 0.5.2 2021-10-11 - Creating explicitly refvertexed Graph objects to get around all the problems caused by interpreting them as scalar values. 0.5.1 2021-08-29 - Working around an incompatibility with Graph v0.9721. 0.5.0 2021-08-26 - Handling raw parsing output in Chemistry::OpenSMILES::clean_chiral_centers(). - Fixing a bug in position counting due to $1 pollution. - Detecting and reporting disconnected parts of moieties. - Introducing Chemistry::OpenSMILES::is_chiral() to detect both chiral atoms and moieties. - Fixing bug in Chemistry::OpenSMILES::clean_chiral_centers() which caused removal of all chiral centers, not just tetrahedral. - Introducing Chemistry::OpenSMILES::mirror() to invert chiral centers, currently only tetrahedral. - Fixing bug in printing of chiral centers, previously only tetrahedral centers were printed. 0.4.6 2021-04-14 - Fixing reading/writing of cis/trans ring bonds. 0.4.5 2021-03-25 - Fixing a bug in aromatic bond detection. 0.4.4 2021-03-09 - Introducing Chemistry::OpenSMILES::clean_chiral_centers() to remove unnecessary tetrahedral chiral settings. 0.4.3 2021-01-25 - Supporting tetrahedral chirality in Chemistry::OpenSMILES::Writer::write_SMILES(). - Chiralities @TH1 and @TH2 are the same as @ and @@, respectively. - Specifying the requirement of Graph v0.97 or later. - Fixing issue with homepage. 0.4.2 2021-01-20 - Exporting Chemistry::OpenSMILES::Writer::write_SMILES(). - Removing chirality information from written SMILES, as code to write this reliably is not yet available. - Fixing writing of cis/trans bonds. 0.4.1 2021-01-12 - Fixing an issue with 'scalar %hash' in t/01_atoms.t. - Listing a dependency on Graph::Traversal::DFS. 0.4.0 2021-01-11 - Adding Chemistry::OpenSMILES::Writer, implementing a SMILES writer for the data structures built by Chemistry::OpenSMILES::Parser. - Converting 'charge' atom field to integer. - Exporting Chemistry::OpenSMILES::is_aromatic(). 0.3.2 2020-10-05 - Explicitly setting bond order to ':' on bonds between aromatic atoms. 0.3.1 2020-09-17 - Establishing deterministic order while generating graph vertices for implicit hydrogen atoms. 0.3.0 2020-09-16 - Deriving counts of implicit hydrogen atoms. - Default number of attached hydrogen atoms for atoms in square brackets is 0. - Supporting bracketless '*' atom. 0.2.1 2020-08-13 - Adding 'number' atom field to store atom positions in the initial SMILES string. - Making 'class' atom field mandatory. - Making 'isotope' atom field an integer. 0.2.0 2020-05-20 - Fixing an issue with graph merging. - Unifying the representation of single bonds. - Extending POD documentation. 0.1.3 2020-05-19 - Detecting and reporting unbalanced parentheses. - Detecting and reporting unclosed ring bonds. - Fixing position pointers in error messages, removing line numbers. - Adding versioned dependency on Module::Build::Parse::Yapp v0.1.2. - Declaring minimum Perl version. - Fixing the POD. 0.1.2 2020-05-17 - Turning hydrogen counts into real vertices. If 'raw' option is present, hydrogen counts are left as integers instead. 0.1.1 2020-05-14 - Adding missing dependency on Graph::Undirected. - Adding POD. 0.1.0 2020-05-13 - Initial release. Chemistry-OpenSMILES-0.7.0/README0000644000200400020040000000050514137221046016037 0ustar andriusandrius This archive contains the distribution Chemistry-OpenSMILES, version 0.7.0: OpenSMILES format reader and writer This software is Copyright (c) 2020-2021 by Andrius Merkys. This is free software, licensed under: The (three-clause) BSD License This README file was generated by Dist::Zilla::Plugin::Readme v6.010. Chemistry-OpenSMILES-0.7.0/Build.PL0000644000200400020040000000246114137221046016456 0ustar andriusandrius # This file was automatically generated by Dist::Zilla::Plugin::ModuleBuild v6.010. use strict; use warnings; use Module::Build 0.28; use lib qw{inc}; use Module::Build::Parse::Yapp; my %module_build_args = ( "build_requires" => { "Module::Build" => "0.28", "Module::Build::Parse::Yapp" => "v0.1.2" }, "configure_requires" => { "Module::Build" => "0.28", "Module::Build::Parse::Yapp" => "v0.1.2" }, "dist_abstract" => "OpenSMILES format reader and writer", "dist_author" => [ "Andrius Merkys " ], "dist_name" => "Chemistry-OpenSMILES", "dist_version" => "0.7.0", "license" => "bsd", "module_name" => "Chemistry::OpenSMILES", "recursive_test_files" => 1, "requires" => { "Graph" => "0.97", "List::Util" => "1.45", "Parse::Yapp" => 0, "perl" => "5.010000" }, "test_requires" => { "Test::More" => 0 } ); my %fallback_build_requires = ( "Module::Build" => "0.28", "Module::Build::Parse::Yapp" => "v0.1.2", "Test::More" => 0 ); unless ( eval { Module::Build->VERSION(0.4004) } ) { delete $module_build_args{test_requires}; $module_build_args{build_requires} = \%fallback_build_requires; } my $build = Module::Build::Parse::Yapp->new(%module_build_args); $build->add_build_element($_) for qw(yp); $build->create_build_script; Chemistry-OpenSMILES-0.7.0/MANIFEST0000644000200400020040000000116314137221046016311 0ustar andriusandrius# This file was automatically generated by Dist::Zilla::Plugin::Manifest v6.010. Build.PL Changes LICENSE MANIFEST META.json META.yml README dist.ini lib/Chemistry/OpenSMILES.pm lib/Chemistry/OpenSMILES/Parser.yp lib/Chemistry/OpenSMILES/Writer.pm t/01_atoms.t t/02_chains.t t/03_hcount.t t/04_errors.t t/05_orders.t t/06_write.t t/07_cistrans.t t/08_permutation_order.t t/09_tetrahedral_chirality.t t/10_validate.t t/11_validate_color.t t/12_tetrahedral_chirality.t t/13_clean_chiral_centers.t t/14_write_disconnected.t t/15_chirality.t t/16_unsupported_chirality.t t/17_max_hydrogen_count_digits.t t/18_chirality_reference.t Chemistry-OpenSMILES-0.7.0/META.yml0000644000200400020040000000153514137221046016434 0ustar andriusandrius--- abstract: 'OpenSMILES format reader and writer' author: - 'Andrius Merkys ' build_requires: Module::Build: '0.28' Module::Build::Parse::Yapp: v0.1.2 Test::More: '0' configure_requires: Module::Build: '0.28' Module::Build::Parse::Yapp: v0.1.2 dynamic_config: 0 generated_by: 'Dist::Zilla version 6.010, CPAN::Meta::Converter version 2.150010' license: bsd meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: Chemistry-OpenSMILES requires: Graph: '0.97' List::Util: '1.45' Parse::Yapp: '0' perl: '5.010000' resources: bugtracker: https://github.com/merkys/chemistry-opensmiles/issues homepage: https://search.cpan.org/dist/Chemistry-OpenSMILES repository: git://github.com/merkys/chemistry-opensmiles.git version: 0.7.0 x_serialization_backend: 'YAML::Tiny version 1.70' Chemistry-OpenSMILES-0.7.0/t/0000775000200400020040000000000014137221046015424 5ustar andriusandriusChemistry-OpenSMILES-0.7.0/t/08_permutation_order.t0000644000200400020040000000100614137221046021655 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Writer; use Test::More; my $cases = 20; plan tests => $cases; for (1..$cases) { my @order = 0..3; for (0..9) { if( rand() < 0.5 ) { @order = ( @order[1..2], $order[0], $order[3] ); } else { @order = ( $order[0], @order[2..3], $order[1] ); } } # is( join( '', @order ), '0123' ); is( join( '', Chemistry::OpenSMILES::Writer::_permutation_order( @order ) ), '0123' ); } Chemistry-OpenSMILES-0.7.0/t/15_chirality.t0000644000200400020040000000163114137221046020105 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw(is_chiral mirror); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'N[C@](Br)(O)C', 1, 1, 'N([C@@](Br)(O)(C))' ], [ 'N[C@@](Br)(O)C', 1, 1, 'N([C@](Br)(O)(C))' ], [ 'N(C)(Br)(O)C', 0, 0, 'N(C)(Br)(O)(C)' ], [ 'N(C)(Br)(O)C', 0, 0, 'N(C)(Br)(O)(C)' ], [ 'N[C@AL1](Br)(O)C', 1, 0, 'N(C(Br)(O)(C))' ], ); plan tests => 3 * scalar @cases; for my $case (@cases) { my $parser; my $moiety; my $result; $parser = Chemistry::OpenSMILES::Parser->new; ( $moiety ) = $parser->parse( $case->[0], { raw => 1 } ); is( is_chiral( $moiety ) + 0, $case->[1] ); is( Chemistry::OpenSMILES::is_chiral_tetrahedral( $moiety ) + 0, $case->[2] ); mirror( $moiety ); is( write_SMILES( $moiety ), $case->[3] ); } Chemistry-OpenSMILES-0.7.0/t/09_tetrahedral_chirality.t0000644000200400020040000000277514137221046022501 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'N[C@](Br)(O)C', 'N([C@](Br)(O)(C))', 'C([C@](O)(Br)(N))' ], [ 'Br[C@](O)(N)C', 'Br([C@](O)(N)(C))', 'C([C@](N)(O)(Br))' ], [ 'O[C@](Br)(C)N', 'O([C@](Br)(C)(N))', 'N([C@](C)(Br)(O))' ], [ 'Br[C@](C)(O)N', 'Br([C@](C)(O)(N))', 'N([C@](O)(C)(Br))' ], [ 'C[C@](Br)(N)O', 'C([C@](Br)(N)(O))', 'O([C@](N)(Br)(C))' ], [ 'Br[C@](N)(C)O', 'Br([C@](N)(C)(O))', 'O([C@](C)(N)(Br))' ], [ 'C[C@@](Br)(O)N', 'C([C@@](Br)(O)(N))', 'N([C@@](O)(Br)(C))' ], [ 'Br[C@@](N)(O)C', 'Br([C@@](N)(O)(C))', 'C([C@@](O)(N)(Br))' ], [ '[C@@](C)(Br)(O)N', '[C@@](C)(Br)(O)(N)', 'N([C@@](O)(Br)(C))' ], [ '[C@@](Br)(N)(O)C', '[C@@](Br)(N)(O)(C)', 'C([C@@](O)(N)(Br))' ], [ 'C1OCC[C@]1(Cl)Br', 'C1(O(C(C([C@]1(Cl)(Br)))))', 'Br([C@@]1(Cl)(C(C(O(C1)))))' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0], { raw => 1 } ); $result = write_SMILES( \@moieties ); is( $result, $case->[1] ); $result = write_SMILES( \@moieties, \&reverse_order ); is( $result, $case->[2] ); } sub reverse_order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$b}{number} <=> $vertices->{$a}{number} } keys %$vertices; return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.7.0/t/17_max_hydrogen_count_digits.t0000644000200400020040000000056214137221046023360 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( '[YbH12+3]' => 13, ); plan tests => scalar keys %cases; for (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_, { max_hydrogen_count_digits => 2 } ); is( $graph->vertices, $cases{$_} ); } Chemistry-OpenSMILES-0.7.0/t/05_orders.t0000644000200400020040000000215014137221046017407 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'CCC' => {}, 'C-C' => {}, 'C=C' => { '=' => 1 }, 'C-1C1' => {}, 'C=1C1' => { '=' => 1 }, 'C(=O)' => { '=' => 1 }, 'C(C=C)' => { '=' => 1 }, 'cc' => { ':' => 1 }, 'c:c' => { ':' => 1 }, 'c(c)' => { ':' => 1 }, 'c1ccc1' => { ':' => 4 }, 'c-c' => {}, 'c(-c)' => {}, 'c1cc-1' => { ':' => 2 }, 'c-1cc1' => { ':' => 2 }, ); plan tests => 2 * scalar keys %cases; for my $case (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my @graphs = $parser->parse( $case ); is( scalar @graphs, 1 ); my $graph = shift @graphs; my %orders; for ($graph->edges) { next if !$graph->has_edge_attribute( @$_, 'bond' ); $orders{ $graph->get_edge_attribute( @$_, 'bond' )} ++; } is( serialize( \%orders ), serialize( $cases{$case} ) ); } sub serialize { my( $orders ) = @_; return join ' ', map { $_ . '(' . $orders->{$_} . ')' } sort keys %$orders; } Chemistry-OpenSMILES-0.7.0/t/13_clean_chiral_centers.t0000644000200400020040000000113414137221046022240 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw(clean_chiral_centers); use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my %cases = ( 'C[C@H2]O' => 1, 'C[C@H]([H])O' => 1, 'C[C@H]([S])O' => 0, ); plan tests => 2 * scalar keys %cases; for my $case (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my @moieties = $parser->parse( $case, { raw => 1 } ); is( scalar @moieties, 1 ); is( scalar clean_chiral_centers( $moieties[0], \&write_SMILES ), $cases{$case} ); } Chemistry-OpenSMILES-0.7.0/t/04_errors.t0000644000200400020040000000254314137221046017432 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'C(CC' => 't/04_errors.t: syntax error: missing closing parenthesis.', 'CC)C' => 't/04_errors.t: syntax error: unbalanced parentheses.', 'CCC)' => 't/04_errors.t: syntax error: unbalanced parentheses.', 'C#=O' => 't/04_errors.t: syntax error at position 3: \'O\'.', 'C..O' => 't/04_errors.t: syntax error at position 3: \'O\'.', 'CCC1' => 't/04_errors.t: unclosed ring bond(s) detected: 1.', 'C2%12' => 't/04_errors.t: unclosed ring bond(s) detected: 2, 12.', 'C=1CC$1' => 't/04_errors.t: ring bond types for ring bond 1 do not match.', 'C((C))O' => 't/04_errors.t: syntax error at position 3: \'C))O\'.', '(O)' => 't/04_errors.t: syntax error at position 1: \'O)\'.', '(O)C' => 't/04_errors.t: syntax error at position 1: \'O)C\'.', '.CC' => 't/04_errors.t: syntax error at position 1: \'CC\'.', 'CC.' => 't/04_errors.t: syntax error at position 4.', ); plan tests => scalar keys %cases; for (sort keys %cases) { 'intentionally polluting $1' =~ /(\S+)/; my $error; eval { my $parser = Chemistry::OpenSMILES::Parser->new; my @graphs = $parser->parse( $_ ); }; $error = $@ if $@; $error =~ s/\n$// if $error; is( $error, $cases{$_} ); } Chemistry-OpenSMILES-0.7.0/t/14_write_disconnected.t0000644000200400020040000000074714137221046021777 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Graph::Undirected; use Test::More; plan tests => 2; my $graph = Graph::Undirected->new( refvertexed => 1 ); $graph->add_vertex( { symbol => 'C', number => 1 } ); $graph->add_vertex( { symbol => 'O', number => 2 } ); my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; is( write_SMILES( [ $graph ] ), 'C' ); is( $warning, '1 unreachable atom(s) detected in moiety' . "\n" ); Chemistry-OpenSMILES-0.7.0/t/18_chirality_reference.t0000644000200400020040000000153614137221046022132 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw( is_chiral ); use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'C[C@@](Br)(O)N' => 0, 'Br[C@@](N)(O)C' => 0, '[C@@](C)(Br)(O)N' => 1, '[C@@](Br)(N)(O)C' => 1, 'FC1C[C@](Br)(Cl)CCC1' => 2, '[C@]1(Br)(Cl)CCCC(F)C1' => 8, 'C1.[C@]1(Br)(Cl)O' => 0, 'C(CCCC1)[C@]1(Br)(Cl)' => 0, 'C([C@](Br)(Cl)O)C' => 0, ); plan tests => 3 * scalar keys %cases; for (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_ ); my( $chiral_center ) = grep { is_chiral $_ } $graph->vertices; ok( defined $chiral_center ); ok( exists $chiral_center->{chirality_neighbours} ); is( $chiral_center->{chirality_neighbours}[0]{number}, $cases{$_} ); } Chemistry-OpenSMILES-0.7.0/t/01_atoms.t0000644000200400020040000000145214137221046017234 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw( write_SMILES ); use Test::More; my @cases = qw( C N Cl * [U] [Pb] [He] [CH4] [ClH] [Cl-] [Cu+2] [13CH4] [2H+] [238U] ); my %cases = ( '[*]' => '*', '[ClH1]' => '[ClH]', '[Cu++]' => '[Cu+2]', '[OH-1]' => '[OH-]', '[OH1-]' => '[OH-]', '[C@TH1]' => '[C@TH1]', '[C@TH2]' => '[C@TH2]', map { $_ => $_ } @cases, ); plan tests => 2 * scalar keys %cases; for (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_, { raw => 1 } ); is( $graph->vertices, 1 ); is( join( '', map { write_SMILES( $_ ) } $graph->vertices ), $cases{$_} ); } Chemistry-OpenSMILES-0.7.0/t/10_validate.t0000644000200400020040000000165414137221046017706 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( '[C@]' => 'chiral center C(0) has 0 bonds while at least 4 is required', 'C/C(\O)=C(/C)(\O)' => 'atom C(1) has 2 bonds of type \'\\\', cis/trans definitions must not conflict', 'C(Cl)(F)(O)' => 'atom C(0) has 4 distinct neighbours, but does not have a chiral setting', 'C11' => 'atom C(0) has bond to itself', 'C/C' => 'cis/trans bond is defined between atoms C(0) and C(1), but neither of them is attached to a double bond', ); plan tests => scalar keys %cases; for (sort keys %cases) { my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_ ); Chemistry::OpenSMILES::_validate( $graph ); $warning =~ s/\n$// if defined $warning; is( $warning, $cases{$_} ); } Chemistry-OpenSMILES-0.7.0/t/11_validate_color.t0000644000200400020040000000276014137221046021104 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES qw( clean_chiral_centers ); use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'C[C@](C)(C)(C)' => 'tetrahedral chiral setting for C(1) is not needed as not all 4 neighbours are distinct', 'C[C@](Cl)(F)(O)' => undef, 'C(Cl)(F)(O)' => 'atom C(0) has 4 distinct neighbours, but does not have a chiral setting', 'N[C@@]12NC(N[C@]2(NC(N1))N)' => 'tetrahedral chiral setting for C(5) is not needed as not all 4 neighbours are distinct', ); plan tests => 3 * scalar keys %cases; for (sort keys %cases) { my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_ ); Chemistry::OpenSMILES::_validate( $graph, sub { return $_[0]->{symbol} } ); $warning =~ s/\n$// if defined $warning; is( $warning, $cases{$_} ); # Unnecessary chiral centers should be removed my @affected = clean_chiral_centers( $graph, sub { return $_[0]->{symbol} } ); is( @affected != 0, defined $cases{$_} && $cases{$_} =~ /not needed/ ); # After removal, validation should pass undef $warning; Chemistry::OpenSMILES::_validate( $graph, sub { return $_[0]->{symbol} } ); $warning =~ s/\n$// if defined $warning; is( !defined $warning, !defined $cases{$_} || @affected != 0 ); } Chemistry-OpenSMILES-0.7.0/t/03_hcount.t0000644000200400020040000000217414137221046017415 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'C' => 5, 'C[U]' => 5, 'N' => 4, 'Cl' => 2, '[U]' => 1, '[Pb]' => 1, '[He]' => 1, '[*]' => 1, '[CH4]' => 5, '[ClH]' => 2, '[ClH1]' => 2, '[Cl-]' => 1, '[OH1-]' => 2, '[OH-1]' => 2, '[Cu+2]' => 1, '[Cu++]' => 1, '[13CH4]' => 5, '[2H+]' => 1, '[238U]' => 1, 'S' => 3, 'S[O]' => 3, 'S([O])([O])' => 3, 'S([O])([O])([O])' => 5, 'S([O])([O])([O])([O])' => 5, 'S([O])([O])([O])([O])([O])' => 7, 'O([O])' => 3, 'O([O])([O])' => 3, 'O([O])([O])([O])' => 4, 'CC' => 8, 'C-C' => 8, 'C=C' => 6, 'C#C' => 4, 'C$C' => 2, 'C1=CC=CC=C1' => 12, 'c1ccccc1' => 12, 'c1cncnc1' => 10, ); plan tests => scalar keys %cases; for (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph ) = $parser->parse( $_ ); is( $graph->vertices, $cases{$_} ); } Chemistry-OpenSMILES-0.7.0/t/12_tetrahedral_chirality.t0000644000200400020040000000146214137221046022463 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'Br[C@H](Cl)(I)', 'Br([C@](Cl)(I)([H]))', '[H]([C@](I)(Cl)(Br))' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0] ); $result = write_SMILES( \@moieties ); is( $result, $case->[1] ); $result = write_SMILES( \@moieties, \&reverse_order ); is( $result, $case->[2] ); } sub reverse_order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$b}{number} <=> $vertices->{$a}{number} } keys %$vertices; return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.7.0/t/02_chains.t0000644000200400020040000000373514137221046017365 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use List::Util qw(sum); use Chemistry::OpenSMILES::Parser; use Test::More; my %cases = ( 'CC' => [ 2, 1 ], 'CCO' => [ 3, 2 ], 'NCCCC' => [ 5, 4 ], 'CCCCN' => [ 5, 4 ], 'C=C' => [ 2, 1 ], 'C#N' => [ 2, 1 ], 'CC#CC' => [ 4, 3 ], 'CCC=O' => [ 4, 3 ], '[Rh-](Cl)(Cl)(Cl)(Cl)$[Rh-](Cl)(Cl)(Cl)Cl' => [ 10, 9 ], 'C-C' => [ 2, 1 ], 'CCC(CC)CO' => [ 7, 6 ], 'CC(C)C(=O)C(C)C' => [ 8, 7 ], 'OCC(CCC)C(C(C)C)CCC' => [ 13, 12 ], 'OS(=O)(=S)O' => [ 5, 4 ], 'C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C(C))))))))))))))))))))C' => [ 22, 21 ], 'C1CCCCC1' => [ 6, 6 ], 'N1CC2CCCCC2CC1' => [ 10, 11 ], 'C=1CCCCC=1' => [ 6, 6 ], 'C1CCCCC1C1CCCCC1' => [ 12, 13 ], 'C1CCCCC1C2CCCCC2' => [ 12, 13 ], 'C0CCCCC0' => [ 6, 6 ], 'C%25CCCCC%25' => [ 6, 6 ], 'C1CCCCC%01' => [ 6, 6 ], 'C12(CCCCC1)CCCCC2' => [ 11, 12 ], # The following case is not allowed by OpenSMILES specification, # however, it is easier to support it than forbid. 'C(CCCCC1)12CCCCC2' => [ 11, 12 ], '[Na+].[Cl-]' => [ 2, 2, 0 ], 'c1cc(O.NCCO)ccc1' => [ 2, 11, 10 ], 'Oc1cc(.NCCO)ccc1' => [ 2, 11, 10 ], 'C1.C1' => [ 1, 2, 1 ], 'C1.C12.C2' => [ 1, 3, 2 ], 'c1c2c3c4cc1.Br2.Cl3.Cl4' => [ 1, 9, 9 ], 'C(C(C1))C1' => [ 4, 4 ], # FIXME: self-bond is obviously incorrect and should be reported. 'C11' => [ 1, 1 ], ); plan tests => 3 * scalar keys %cases; for my $case (sort keys %cases) { my $parser = Chemistry::OpenSMILES::Parser->new; my @graphs = $parser->parse( $case, { raw => 1 } ); is( scalar @graphs, @{$cases{$case}} == 3 ? $cases{$case}->[0] : 1 ); is( sum( map { scalar $_->vertices } @graphs ), $cases{$case}->[-2] ); is( sum( map { scalar $_->edges } @graphs ), $cases{$case}->[-1] ); } Chemistry-OpenSMILES-0.7.0/t/16_unsupported_chirality.t0000644000200400020040000000162314137221046022557 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'N[C@](Br)(O)C', 'N([C@](Br)(O([H]))(C([H])([H])([H])))([H])([H])', undef ], [ 'NC(Br)=[C@]=C(O)C', 'N(C(Br)(=C(=C(O([H]))(C([H])([H])([H])))))([H])([H])', undef ], [ 'N[C@](Br)(O)(C)(Cl)', 'N(C(Br)(O([H]))(C([H])([H])([H]))(Cl))([H])([H])', 'chirality \'@\' observed for atom with 5 neighbours, can only process tetrahedral chiral centers' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $warning; local $SIG{__WARN__} = sub { $warning = $_[0] }; my $parser = Chemistry::OpenSMILES::Parser->new; my @moieties = $parser->parse( $case->[0] ); is( write_SMILES( \@moieties ), $case->[1] ); $warning =~ s/\n$// if $warning; is( $warning, $case->[2] ); } Chemistry-OpenSMILES-0.7.0/t/07_cistrans.t0000644000200400020040000000215114137221046017742 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'Br/C=C/F', 'Br(/C(=C(/F)))', 'F(\C(=C(\Br)))' ], [ 'C(\Br)=C/F', 'C(\Br)(=C(/F))', 'F(\C(=C(\Br)))' ], [ 'Br\C=C/F', 'Br(\C(=C(/F)))', 'F(\C(=C(/Br)))' ], [ 'C(/Br)=C/F', 'C(/Br)(=C(/F))', 'F(\C(=C(/Br)))' ], # Adapted from COD entry 1100225: [ 'Cl/C(=C\1COCN1)C', 'Cl(/C(=C\1(C(O(C(N/1)))))(C))', 'C(C(=C1(\N(C(O(C1)))))(\Cl))' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0], { raw => 1 } ); $result = write_SMILES( \@moieties ); is( $result, $case->[1] ); $result = write_SMILES( \@moieties, \&reverse_order ); is( $result, $case->[2] ); } sub reverse_order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$b}{number} <=> $vertices->{$a}{number} } keys %$vertices; return $vertices->{shift @sorted}; } Chemistry-OpenSMILES-0.7.0/t/06_write.t0000644000200400020040000000274714137221046017260 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Chemistry::OpenSMILES::Parser; use Chemistry::OpenSMILES::Writer qw(write_SMILES); use Test::More; my @cases = ( [ 'C', 'C' ], [ 'C=C', 'C(=C)' ], [ 'C=1=C=C=C=1', 'C=1(=C(=C(=C=1)))' ], [ 'C#C.c1ccccc1', 'C(#C).c:1(:c(:c(:c(:c(:c:1)))))' ], [ 'C1CC2CCCCC2CC1', 'C1(C(C2(C(C(C(C(C2(C(C1)))))))))' ], # A strange way to write fused rings: [ 'C1(CCCCC11)(CCCC1)', 'C12(C(C(C(C(C1(C(C(C(C2)))))))))' ], # Single bonds between two aromatic atoms must be explicitly represented: [ 'c1cc-ccc1', 'c:1(:c(:c(-c(:c(:c:1)))))' ], # Chirality information is preserved: [ 'N[C@](Br)(O)C', 'N([C@](Br)(O)(C))' ], [ 'N[C@@](Br)(O)C', 'N([C@@](Br)(O)(C))' ], # A regression test for previously incorrectly identified aromatic bond: [ 'c1(c(cccc1)F)C(=O)[O-]', 'c:1(:c(:c(:c(:c(:c:1))))(F))(C(=O)([O-]))' ], # Cyclooctatetraene adapted from OpenSMILES v1.0 specification: [ 'C/1=C/C=C\C=C/C=C\1', 'C/1(=C(/C(=C(\C(=C(/C(=C\1)))))))' ], ); plan tests => 2 * scalar @cases; for my $case (@cases) { my $parser; my @moieties; my $result; $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $case->[0], { raw => 1 } ); $result = write_SMILES( \@moieties ); is( $result, $case->[1] ); $parser = Chemistry::OpenSMILES::Parser->new; @moieties = $parser->parse( $result, { raw => 1 } ); $result = write_SMILES( \@moieties ); is( $result, $case->[1] ); } Chemistry-OpenSMILES-0.7.0/lib/0000775000200400020040000000000014137221046015727 5ustar andriusandriusChemistry-OpenSMILES-0.7.0/lib/Chemistry/0000775000200400020040000000000014137221046017676 5ustar andriusandriusChemistry-OpenSMILES-0.7.0/lib/Chemistry/OpenSMILES.pm0000644000200400020040000002655214137221046022062 0ustar andriusandriuspackage Chemistry::OpenSMILES; use strict; use warnings; use 5.0100; # ABSTRACT: OpenSMILES format reader and writer our $VERSION = '0.7.0'; # VERSION require Exporter; our @ISA = qw( Exporter ); our @EXPORT_OK = qw( clean_chiral_centers is_aromatic is_chiral mirror ); use List::Util qw(any); sub is_chiral($); sub is_chiral_tetrahedral($); sub mirror($); # Removes chiral setting from tetrahedral chiral centers with less than # four distinct neighbours. Returns the affected atoms. # # CAVEAT: disregards anomers # TODO: check other chiral centers sub clean_chiral_centers($$) { my( $moiety, $color_sub ) = @_; my @affected; for my $atom ($moiety->vertices) { next unless is_chiral_tetrahedral( $atom ); my $hcount = exists $atom->{hcount} ? $atom->{hcount} : 0; next if $moiety->degree($atom) + $hcount != 4; my %colors = map { ($color_sub->( $_ ) => 1) } $moiety->neighbours($atom), ( { symbol => 'H' } ) x $hcount; next if scalar keys %colors == 4; delete $atom->{chirality}; push @affected, $atom; } return @affected; } sub is_aromatic($) { my( $atom ) = @_; return $atom->{symbol} ne ucfirst $atom->{symbol}; } sub is_chiral($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom return exists $what->{chirality}; } else { # Graph representing moiety return any { is_chiral( $_ ) } $what->vertices; } } sub is_chiral_tetrahedral($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom # CAVEAT: will fail for allenal configurations of @/@@ in raw mode return $what->{chirality} && $what->{chirality} =~ /^@@?$/ } else { # Graph representing moiety return any { is_chiral_tetrahedral( $_ ) } $what->vertices; } } sub mirror($) { my( $what ) = @_; if( ref $what eq 'HASH' ) { # Single atom # FIXME: currently dealing only with tetrahedral chiral centers if( is_chiral_tetrahedral( $what ) ) { $what->{chirality} = $what->{chirality} eq '@' ? '@@' : '@'; } } else { for ($what->vertices) { mirror( $_ ); } } } # CAVEAT: requires output from non-raw parsing due issue similar to GH#2 sub _validate($@) { my( $moiety, $color_sub ) = @_; for my $atom (sort { $a->{number} <=> $b->{number} } $moiety->vertices) { # TODO: AL chiral centers also have to be checked if( is_chiral_tetrahedral( $atom ) ) { if( $moiety->degree($atom) < 4 ) { # FIXME: tetrahedral allenes are false-positives warn sprintf 'chiral center %s(%d) has %d bonds while ' . 'at least 4 is required' . "\n", $atom->{symbol}, $atom->{number}, $moiety->degree($atom); } elsif( $moiety->degree($atom) == 4 && $color_sub ) { my %colors = map { ($color_sub->( $_ ) => 1) } $moiety->neighbours($atom); if( scalar keys %colors != 4 ) { # FIXME: anomers are false-positives, see COD entry # 7111036 warn sprintf 'tetrahedral chiral setting for %s(%d) ' . 'is not needed as not all 4 neighbours ' . 'are distinct' . "\n", $atom->{symbol}, $atom->{number}; } } } # Warn about unmarked tetrahedral chiral centers if( !is_chiral( $atom ) && $moiety->degree( $atom ) == 4 ) { my $color_sub_local = $color_sub; if( !$color_sub_local ) { $color_sub_local = sub { return $_[0]->{symbol} }; } my %colors = map { ($color_sub_local->( $_ ) => 1) } $moiety->neighbours($atom); if( scalar keys %colors == 4 ) { warn sprintf 'atom %s(%d) has 4 distinct neighbours, ' . 'but does not have a chiral setting' . "\n", $atom->{symbol}, $atom->{number}; } } } # FIXME: establish deterministic order for my $bond ($moiety->edges) { my( $A, $B ) = sort { $a->{number} <=> $b->{number} } @$bond; if( $A eq $B ) { warn sprintf 'atom %s(%d) has bond to itself' . "\n", $A->{symbol}, $A->{number}; } if( $moiety->has_edge_attribute( @$bond, 'bond' ) ) { my $bond_type = $moiety->get_edge_attribute( @$bond, 'bond' ); if( $bond_type eq '=' ) { # Test cis/trans bonds # FIXME: Not sure how to check which definition belongs to # which of the double bonds. See COD entry 1547257. for my $atom (@$bond) { my %bond_types = _neighbours_per_bond_type( $moiety, $atom ); foreach ('/', '\\') { if( $bond_types{$_} && @{$bond_types{$_}} > 1 ) { warn sprintf 'atom %s(%d) has %d bonds of type \'%s\', ' . 'cis/trans definitions must not conflict' . "\n", $atom->{symbol}, $atom->{number}, scalar @{$bond_types{$_}}, $_; } } } } elsif( $bond_type =~ /^[\\\/]$/ ) { # Test if next to a double bond. # FIXME: Yields false-positives for delocalised bonds, # see COD entry 1501863. # FIXME: What about triple bond? See COD entry 4103591. my %bond_types; for my $atom (@$bond) { my %bond_types_now = _neighbours_per_bond_type( $moiety, $atom ); for my $key (keys %bond_types_now) { push @{$bond_types{$key}}, @{$bond_types_now{$key}}; } } if( !$bond_types{'='} ) { warn sprintf 'cis/trans bond is defined between atoms ' . '%s(%d) and %s(%d), but neither of them ' . 'is attached to a double bond' . "\n", $A->{symbol}, $A->{number}, $B->{symbol}, $B->{number}; } } } } # TODO: SP, TB, OH chiral centers } sub _neighbours_per_bond_type { my( $moiety, $atom ) = @_; my %bond_types; for my $neighbour ($moiety->neighbours($atom)) { my $bond_type; if( $moiety->has_edge_attribute( $atom, $neighbour, 'bond' ) ) { $bond_type = $moiety->get_edge_attribute( $atom, $neighbour, 'bond' ); } else { $bond_type = ''; } if( $bond_type =~ /^[\\\/]$/ && $atom->{number} > $neighbour->{number} ) { $bond_type = $bond_type eq '\\' ? '/' : '\\'; } push @{$bond_types{$bond_type}}, $neighbour; } return %bond_types; } 1; __END__ =pod =head1 NAME Chemistry::OpenSMILES - OpenSMILES format reader and writer =head1 SYNOPSIS use Chemistry::OpenSMILES::Parser; my $parser = Chemistry::OpenSMILES::Parser->new; my @moieties = $parser->parse( 'C#C.c1ccccc1' ); $\ = "\n"; for my $moiety (@moieties) { # $moiety is a Graph::Undirected object print scalar $moiety->vertices; print scalar $moiety->edges; } use Chemistry::OpenSMILES::Writer qw(write_SMILES); print write_SMILES( \@moieties ); =head1 DESCRIPTION Chemistry::OpenSMILES provides support for SMILES chemical identifiers conforming to OpenSMILES v1.0 specification (L). Chemistry::OpenSMILES::Parser reads in SMILES strings and returns them parsed to arrays of L objects. Each atom is represented by a hash. Chemistry::OpenSMILES::Writer performs the inverse operation. Generated SMILES strings are by no means optimal. =head2 Molecular graph Disconnected parts of a compound are represented as separate L objects. Atoms are represented as vertices, and bonds are represented as edges. =head3 Atoms Atoms, or vertices of a molecular graph, are represented as hash references: { "symbol" => "C", "isotope" => 13, "chirality" => "@@", "hcount" => 3, "charge" => 1, "class" => 0, "number" => 0, } Except for C, C and C, all keys of hash are optional. Per OpenSMILES specification, default values for C and C are 0. For chiral atoms, the order of its neighbours in input is preserved in an array added as value for C key of the atom hash. =head3 Bonds Bonds, or edges of a molecular graph, rely completely on L internal representation. Bond orders other than single (C<->, which is also a default) are represented as values of edge attribute C. They correspond to the symbols used in OpenSMILES specification. =head2 Options C accepts the following options for key-value pairs in an anonymous hash for its second parameter: =over =item C In OpenSMILES specification the number of attached hydrogen atoms for atoms in square brackets is limited to 9. IUPAC SMILES+ has increased this number to 99. With the value of C the parser could be instructed to allow other than 1 digit for attached hydrogen count. =item C With C set to anything evaluating to false, the parser will not convert neither implicit nor explicit hydrogen atoms in square brackets to atom hashes of their own. Moreover, it will not attempt to unify the representations of chirality. It should be noted, though, that many of subroutines of Chemistry::OpenSMILES expect non-raw data structures, thus processing raw output may produce weird results. =back =head1 CAVEATS Element symbols in square brackets are not limited to the ones known to chemistry. Currently any single or two-letter symbol is allowed. Deprecated charge notations (C<--> and C<++>) are supported. OpenSMILES specification mandates a strict order of ring bonds and branches: branched_atom ::= atom ringbond* branch* Chemistry::OpenSMILES::Parser supports both the mandated, and inverted structure, where ring bonds follow branch descriptions. Whitespace is not supported yet. SMILES descriptors must be cleaned of it before attempting reading with Chemistry::OpenSMILES::Parser. The derivation of implicit hydrogen counts for aromatic atoms is not unambiguously defined in the OpenSMILES specification. Thus only aromatic carbon is accounted for as if having valence of 3. =head1 SEE ALSO perl(1) =head1 AUTHORS Andrius Merkys, Emerkys@cpan.orgE =cut Chemistry-OpenSMILES-0.7.0/lib/Chemistry/OpenSMILES/0000775000200400020040000000000014137221046021514 5ustar andriusandriusChemistry-OpenSMILES-0.7.0/lib/Chemistry/OpenSMILES/Parser.yp0000644000200400020040000003513714137221046023331 0ustar andriusandrius# Header section %{ use warnings; use 5.0100; use Chemistry::OpenSMILES qw( is_aromatic is_chiral ); use Graph::Undirected; use List::Util qw(any sum); my %normal_valence = ( B => [ 3 ], C => [ 4 ], N => [ 3, 5 ], O => [ 2 ], P => [ 3, 5 ], S => [ 2, 4, 6 ], F => [ 1 ], Cl => [ 1 ], Br => [ 1 ], I => [ 1 ], c => [ 3 ], ); my %bond_order = ( '-' => 1, '=' => 2, '#' => 3, '$' => 4, ); %} %% # Rules section # The top-level 'filter' rule smiles: chain ; chain: atom { my $g = Graph::Undirected->new( refvertexed => 1 ); $g->add_vertex( $_[1] ); push @{$_[0]->{USER}{GRAPHS}}, $g; $_[1]->{graph} = $g; $_[1]->{index} = @{$_[0]->{USER}{GRAPHS}}-1; $_[1]->{first_of_chain} = 1; return { first => $_[1], last => $_[1] }; } | chain atom { $_[2]->{graph} = $_[1]->{last}{graph}; $_[2]->{index} = $_[1]->{last}{index}; $_[2]->{graph}->add_edge( $_[1]->{last}, $_[2] ); if( is_aromatic $_[1]->{last} && is_aromatic $_[2] ) { $_[2]->{graph}->set_edge_attribute( $_[1]->{last}, $_[2], 'bond', ':' ); } delete $_[2]->{first_of_chain}; _push_chirality_neighbour( $_[1]->{last}, $_[2] ); _push_chirality_neighbour( $_[2], $_[1]->{last} ); $_[1]->{last} = $_[2]; return $_[1]; } | chain bond atom { $_[3]->{graph} = $_[1]->{last}{graph}; $_[3]->{index} = $_[1]->{last}{index}; if( $_[2] ne '-' ) { $_[3]->{graph}->set_edge_attribute( $_[1]->{last}, $_[3], 'bond', $_[2] ); } else { $_[3]->{graph}->add_edge( $_[1]->{last}, $_[3] ); } delete $_[3]->{first_of_chain}; _push_chirality_neighbour( $_[1]->{last}, $_[3] ); _push_chirality_neighbour( $_[3], $_[1]->{last} ); $_[1]->{last} = $_[3]; return $_[1]; } | chain '.' atom { my $g = Graph::Undirected->new( refvertexed => 1 ); $g->add_vertex( $_[3] ); push @{$_[0]->{USER}{GRAPHS}}, $g; $_[3]->{graph} = $g; $_[3]->{index} = @{$_[0]->{USER}{GRAPHS}}-1; $_[3]->{first_of_chain} = 1; return { first => $_[3], last => $_[3] }; } | chain '(' chain ')' { if( $_[1]->{last}{index} != $_[3]->{first}{index} ) { $_[0]->_merge_graphs( $_[1]->{last}{index}, $_[3]->{first}{index} ); } $_[1]->{last}{graph}->add_edge( $_[1]->{last}, $_[3]->{first} ); if( is_aromatic $_[1]->{last} && is_aromatic $_[3]->{first} ) { $_[1]->{last}{graph}->set_edge_attribute( $_[1]->{last}, $_[3]->{first}, 'bond', ':' ); } delete $_[3]->{first}{first_of_chain}; _push_chirality_neighbour( $_[1]->{last}, $_[3]->{first} ); _unshift_chirality_neighbour( $_[3]->{first}, $_[1]->{last} ); return $_[1]; } | chain '(' bond chain ')' { if( $_[1]->{last}{index} != $_[4]->{first}{index} ) { $_[0]->_merge_graphs( $_[1]->{last}{index}, $_[4]->{first}{index} ); } if( $_[3] ne '-' ) { $_[1]->{last}{graph}->set_edge_attribute( $_[1]->{last}, $_[4]->{first}, 'bond', $_[3] ); } else { $_[1]->{last}{graph}->add_edge( $_[1]->{last}, $_[4]->{first} ); } delete $_[4]->{first}{first_of_chain}; _push_chirality_neighbour( $_[1]->{last}, $_[4]->{first} ); _unshift_chirality_neighbour( $_[4]->{first}, $_[1]->{last} ); return $_[1]; } | chain '(' '.' chain ')' # According to the specification of OpenSMILES, ring bonds are # allowed only before the branch enumeration. However, I think this # is too strict. | chain ringbond { $_[0]->_add_ring_bond( $_[1]->{last}, $_[2] ); return $_[1]; } | chain bond ringbond { $_[0]->_add_ring_bond( $_[1]->{last}, $_[3], $_[2] ); return $_[1]; } ; bond: '-' | '=' | '#' | '$' | ':' | '/' | '\\' ; %% # Footer section sub _Error { my( $self ) = @_; close $self->{USER}{FILEIN} if $self->{USER}{FILEIN}; if( ${$self->{TOKEN}} eq '' && grep { defined $_ && !ref $_ && $_ eq '(' } map { $_->[1] } @{$self->{STACK}} ) { die "$0: syntax error: missing closing parenthesis.\n"; } if( ${$self->{TOKEN}} eq ')' ) { die "$0: syntax error: unbalanced parentheses.\n"; } my $msg = "$0: syntax error at position $self->{USER}{CHARNO}"; if( $self->YYData->{INPUT} ) { $self->YYData->{INPUT} =~ s/\n$//; die "$msg: '" . $self->YYData->{INPUT} . "'.\n"; } else { die "$msg.\n"; } } sub _Lexer { my( $self ) = @_; # If the line is empty and the input is originating from the file, # another line is read. if( !$self->YYData->{INPUT} && $self->{USER}{FILEIN} ) { my $filein = $self->{USER}{FILEIN}; $self->YYData->{INPUT} = <$filein>; $self->{USER}{CHARNO} = 0; } if( $self->YYData->{INPUT} =~ s/^(\s+)// ) { $self->{USER}{CHARNO} += length $1; } my $hcount_re = 'H[0-9]?'; if( defined $self->{USER}{OPTIONS}{max_hydrogen_count_digits} ) { $hcount_re = sprintf 'H[0-9]{0,%d}', $self->{USER}{OPTIONS}{max_hydrogen_count_digits}; } # Bracket atoms if( $self->YYData->{INPUT} =~ s/^\[ (?[0-9]+)? (?[A-Za-z][a-z]?|\*) (?@( (TH|AL)[12] | SP [123] | (TB|OH)[0-9]{1,2} | @? ))? (? $hcount_re)? (?--|\+\+|[-+][0-9]{0,2})? (:(?[0-9]+))? \]//x ) { my $atom = { %+, number => $self->{USER}{ATOMNO} }; $self->{USER}{ATOMNO} ++; $self->{USER}{CHARNO} += length $&; if( $atom->{charge} ) { $atom->{charge} =~ s/^([-+])$/${1}1/; $atom->{charge} =~ s/^([-+])\1$/${1}2/; $atom->{charge} = int $atom->{charge}; } if( $atom->{hcount} ) { $atom->{hcount} =~ s/^H//; $atom->{hcount} = $atom->{hcount} ? int $atom->{hcount} : 1; } else { $atom->{hcount} = 0; } if( $atom->{isotope} ) { $atom->{isotope} = int $atom->{isotope}; } # Atom class is an arbitrary number, 0 by default $atom->{class} = exists $atom->{class} ? int $atom->{class} : 0; return ( 'atom', $atom ); } # Bracketless atoms if( $self->YYData->{INPUT} =~ s/^(Br|Cl|[BCINOPSFbcnops*])// ) { my $atom = { symbol => $1, class => 0, number => $self->{USER}{ATOMNO} }; $self->{USER}{ATOMNO} ++; $self->{USER}{CHARNO} += length $&; return ( 'atom', $atom ); } # Ring bonds if( $self->YYData->{INPUT} =~ s/^%([0-9]{2})// || $self->YYData->{INPUT} =~ s/^([0-9])// ) { $self->{USER}{CHARNO} += length $&; return ( 'ringbond', int $1 ); } my $char = substr( $self->YYData->{INPUT}, 0, 1 ); if( $char ne '' ) { $self->YYData->{INPUT} = substr( $self->YYData->{INPUT}, 1 ); } $self->{USER}{CHARNO} ++; return( $char, $char ); } sub parse { my( $self, $string, $options ) = @_; $options = {} unless $options; $self->YYData->{INPUT} = $string; $self->{USER}{GRAPHS} = []; $self->{USER}{RINGBONDS} = {}; $self->{USER}{ATOMNO} = 0; $self->{USER}{CHARNO} = 0; $self->{USER}{OPTIONS} = $options; $self->YYParse( yylex => \&_Lexer, yyerror => \&_Error, yydebug => $options->{debug} ); if( scalar keys %{$self->{USER}{RINGBONDS}} ) { die "$0: unclosed ring bond(s) detected: " . join( ', ', sort { $a <=> $b } keys %{$self->{USER}{RINGBONDS}} ) . ".\n"; } my @graphs = grep { defined } @{$self->{USER}{GRAPHS}}; for my $graph (@graphs) { for my $atom (sort { $a->{number} <=> $b->{number} } $graph->vertices) { delete $atom->{graph}; delete $atom->{index}; if( !$options->{raw} ) { # Promote implicit hydrogen atoms into explicit ones if( !exists $atom->{hcount} ) { next if !exists $normal_valence{$atom->{symbol}}; my $degree = sum map { exists $bond_order{$_} ? $bond_order{$_} : 1 } map { $graph->has_edge_attribute( $atom, $_, 'bond' ) ? $graph->get_edge_attribute( $atom, $_, 'bond' ) : '-' } $graph->neighbours( $atom ); $degree = 0 unless $degree; my( $valence ) = grep { $degree <= $_ } @{$normal_valence{$atom->{symbol}}}; next if !defined $valence; $atom->{hcount} = $valence - $degree; } for (1..$atom->{hcount}) { my $hydrogen = { symbol => 'H', class => 0, number => $self->{USER}{ATOMNO} }; $graph->add_edge( $atom, $hydrogen ); $self->{USER}{ATOMNO} ++; if( $atom->{first_of_chain} ) { _unshift_chirality_neighbour( $atom, $hydrogen ); } else { _push_chirality_neighbour( $atom, $hydrogen ); } } delete $atom->{hcount}; # Unify the representation of chirality if( is_chiral $atom ) { if( $atom->{chirality} =~ /^@@?$/ && $graph->degree( $atom ) == 2 ) { $atom->{chirality} =~ s/@+/'@AL' . length $&/e; } $atom->{chirality} =~ s/^\@TH1$/@/; $atom->{chirality} =~ s/^\@TH2$/@@/; } } delete $atom->{first_of_chain}; } } return @graphs; } sub _add_ring_bond { my( $self, $atom, $ring_bond, $bond ) = @_; if( $self->{USER}{RINGBONDS}{$ring_bond} ) { $self->_merge_graphs( $self->{USER}{RINGBONDS}{$ring_bond}{atom}{index}, $atom->{index} ); if( $bond && $self->{USER}{RINGBONDS}{$ring_bond}{bond} && (($bond !~ /^[\\\/]$/ && $bond ne $self->{USER}{RINGBONDS}{$ring_bond}{bond}) || ($bond eq '\\' && $self->{USER}{RINGBONDS}{$ring_bond}{bond} ne '/') || ($bond eq '/' && $self->{USER}{RINGBONDS}{$ring_bond}{bond} ne '\\')) ) { die "$0: ring bond types for ring bond $ring_bond do not match.\n"; } ( $bond ) = grep { defined } ( $self->{USER}{RINGBONDS}{$ring_bond}{bond}, $bond ); my $ring_atom = $self->{USER}{RINGBONDS}{$ring_bond}{atom}; if( !$bond && is_aromatic $ring_atom && is_aromatic $atom ) { $bond = ':'; } if( $bond && $bond ne '-' ) { $atom->{graph}->set_edge_attribute( $ring_atom, $atom, 'bond', $bond ); } else { $atom->{graph}->add_edge( $ring_atom, $atom ); } delete $self->{USER}{RINGBONDS}{$ring_bond}; if( is_chiral $ring_atom && $ring_atom->{chirality_neighbours} ) { my( $pos ) = grep { !ref $ring_atom->{chirality_neighbours}[$_] && $ring_atom->{chirality_neighbours}[$_] == $ring_bond } 0..$#{$ring_atom->{chirality_neighbours}}; $ring_atom->{chirality_neighbours}[$pos] = $atom if defined $pos; } _push_chirality_neighbour( $atom, $ring_atom ); } else { $self->{USER}{RINGBONDS}{$ring_bond} = { atom => $atom, $bond ? ( bond => $bond ) : () }; # Record a placeholder for later addition of real chirality # neighbour, which will be identified by the ring bond number _push_chirality_neighbour( $atom, $ring_bond ); } } sub _merge_graphs { my( $self, $index1, $index2 ) = @_; return if $index1 == $index2; my $g1 = $self->{USER}{GRAPHS}[$index1]; my $g2 = $self->{USER}{GRAPHS}[$index2]; for ($g2->vertices) { $_->{graph} = $g1; $_->{index} = $index1; } $g1->add_vertices( $g2->vertices ); for ($g2->edges) { my $attributes = $g2->get_edge_attributes( @$_ ); if( $attributes ) { $g1->set_edge_attributes( @$_, $attributes ); } else { $g1->add_edge( @$_ ); } } $self->{USER}{GRAPHS}[$index2] = undef; } sub _push_chirality_neighbour { my( $atom1, $atom2 ) = @_; return unless is_chiral $atom1; push @{$atom1->{chirality_neighbours}}, $atom2; } sub _unshift_chirality_neighbour { my( $atom1, $atom2 ) = @_; return unless is_chiral $atom1; unshift @{$atom1->{chirality_neighbours}}, $atom2; } 1; Chemistry-OpenSMILES-0.7.0/lib/Chemistry/OpenSMILES/Writer.pm0000644000200400020040000002172014137221046023326 0ustar andriusandriuspackage Chemistry::OpenSMILES::Writer; use strict; use warnings; use Chemistry::OpenSMILES qw( is_aromatic is_chiral ); use Chemistry::OpenSMILES::Parser; use Graph::Traversal::DFS; use List::Util qw(all uniq); # ABSTRACT: OpenSMILES format writer our $VERSION = '0.7.0'; # VERSION require Exporter; our @ISA = qw( Exporter ); our @EXPORT_OK = qw( write_SMILES ); sub write_SMILES { my( $what, $order_sub ) = @_; if( ref $what eq 'HASH' ) { # subroutine will also accept and properly represent a single # atom: return _pre_vertex( $what ); } my @moieties = ref $what eq 'ARRAY' ? @$what : ( $what ); my @components; $order_sub = \&_order unless $order_sub; for my $graph (@moieties) { my @symbols; my %vertex_symbols; my $nrings = 0; my %seen_rings; my @chiral; my %discovered_from; my $rings = {}; my $operations = { tree_edge => sub { my( $seen, $unseen, $self ) = @_; if( $vertex_symbols{$unseen} ) { ( $seen, $unseen ) = ( $unseen, $seen ); } push @symbols, _tree_edge( $seen, $unseen, $self, $order_sub ); $discovered_from{$unseen} = $seen }, non_tree_edge => sub { my @sorted = sort { $vertex_symbols{$a} <=> $vertex_symbols{$b} } @_[0..1]; $rings->{$vertex_symbols{$sorted[0]}} {$vertex_symbols{$sorted[1]}} = _depict_bond( @sorted, $graph ) }, pre => sub { my( $vertex, $dfs ) = @_; push @chiral, $vertex if is_chiral $vertex; push @symbols, _pre_vertex( { map { $_ => $vertex->{$_} } grep { $_ ne 'chirality' } keys %$vertex } ); $vertex_symbols{$vertex} = $#symbols }, post => sub { push @symbols, ')' }, next_root => undef, }; if( $order_sub ) { $operations->{first_root} = sub { return $order_sub->( $_[1], $_[0]->graph ) }; $operations->{next_successor} = sub { return $order_sub->( $_[1], $_[0]->graph ) }; } my $traversal = Graph::Traversal::DFS->new( $graph, %$operations ); $traversal->dfs; if( scalar keys %vertex_symbols != scalar $graph->vertices ) { warn scalar( $graph->vertices ) - scalar( keys %vertex_symbols ) . ' unreachable atom(s) detected in moiety' . "\n"; } next unless @symbols; pop @symbols; # Dealing with chirality for my $atom (@chiral) { next unless $atom->{chirality} =~ /^@@?$/; my @neighbours = $graph->neighbours($atom); if( scalar @neighbours != 4 ) { # TODO: process also configurations other than tetrahedral warn "chirality '$atom->{chirality}' observed for atom " . 'with ' . scalar @neighbours . ' neighbours, can only ' . 'process tetrahedral chiral centers' . "\n"; next; } my $chirality_now = $atom->{chirality}; if( $atom->{chirality_neighbours} ) { my %indices; for (0..$#{$atom->{chirality_neighbours}}) { $indices{$vertex_symbols{$atom->{chirality_neighbours}[$_]}} = $_; } my @order_new; # In newly established order, the atom from which this one # is discovered (left hand side) will be the first, if any if( $discovered_from{$atom} ) { push @order_new, $vertex_symbols{$discovered_from{$atom}}; } # Second, there will be ring bonds as they are added the # first of all the neighbours if( $rings->{$vertex_symbols{$atom}} ) { push @order_new, sort { $a <=> $b } keys %{$rings->{$vertex_symbols{$atom}}}; } # Finally, all neighbours are added, uniq will remove duplicates push @order_new, sort { $a <=> $b } map { $vertex_symbols{$_} } @neighbours; @order_new = uniq @order_new; if( join( '', _permutation_order( map { $indices{$_} } @order_new ) ) ne '0123' ) { $chirality_now = $chirality_now eq '@' ? '@@' : '@'; } } my $parser = Chemistry::OpenSMILES::Parser->new; my( $graph_reparsed ) = $parser->parse( $symbols[$vertex_symbols{$atom}], { raw => 1 } ); my( $atom_reparsed ) = $graph_reparsed->vertices; $atom_reparsed->{chirality} = $chirality_now; $symbols[$vertex_symbols{$atom}] = write_SMILES( $atom_reparsed ); } # Adding ring numbers my @ring_ids = ( 1..99, 0 ); my @ring_ends; for my $i (0..$#symbols) { if( $rings->{$i} ) { for my $j (sort { $a <=> $b } keys %{$rings->{$i}}) { if( !@ring_ids ) { # All 100 rings are open now. There is no other # solution but to terminate the program. die 'cannot represent more than 100 open ring' . ' bonds'; } $symbols[$i] .= $rings->{$i}{$j} . ($ring_ids[0] < 10 ? '' : '%') . $ring_ids[0]; $symbols[$j] .= ($rings->{$i}{$j} eq '/' ? '\\' : $rings->{$i}{$j} eq '\\' ? '/' : $rings->{$i}{$j}) . ($ring_ids[0] < 10 ? '' : '%') . $ring_ids[0]; push @{$ring_ends[$j]}, shift @ring_ids; } } if( $ring_ends[$i] ) { # Ring bond '0' must stay in the end @ring_ids = sort { ($a == 0) - ($b == 0) || $a <=> $b } (@{$ring_ends[$i]}, @ring_ids); } } push @components, join '', @symbols; } return join '.', @components; } # DEPRECATED sub write { &write_SMILES; } sub _tree_edge { my( $u, $v, $self ) = @_; return '(' . _depict_bond( $u, $v, $self->graph ); } sub _pre_vertex { my( $vertex ) = @_; my $atom = $vertex->{symbol}; my $is_simple = $atom =~ /^[bcnosp]$/i || $atom =~ /^(F|Cl|Br|I|\*)$/; if( exists $vertex->{isotope} ) { $atom = $vertex->{isotope} . $atom; $is_simple = 0; } if( is_chiral $vertex ) { $atom .= $vertex->{chirality}; $is_simple = 0; } if( $vertex->{hcount} ) { # if non-zero $atom .= 'H' . ($vertex->{hcount} == 1 ? '' : $vertex->{hcount}); $is_simple = 0; } if( $vertex->{charge} ) { # if non-zero $atom .= ($vertex->{charge} > 0 ? '+' : '') . $vertex->{charge}; $atom =~ s/([-+])1$/$1/; $is_simple = 0; } if( $vertex->{class} ) { # if non-zero $atom .= ':' . $vertex->{class}; $is_simple = 0; } return $is_simple ? $atom : "[$atom]"; } sub _depict_bond { my( $u, $v, $graph ) = @_; if( !$graph->has_edge_attribute( $u, $v, 'bond' ) ) { return is_aromatic $u && is_aromatic $v ? '-' : ''; } my $bond = $graph->get_edge_attribute( $u, $v, 'bond' ); return $bond if $bond ne '/' && $bond ne '\\'; return $bond if $u->{number} < $v->{number}; return $bond eq '/' ? '\\' : '/'; } # Reorder a permutation of elements 0, 1, 2 and 3 by taking an element # and moving it two places either forward or backward in the line. This # subroutine is used to check whether a sign change of tetragonal # chirality is required or not. sub _permutation_order { # Safeguard against endless cycles due to undefined values return unless scalar @_ == 4; return unless all { defined } @_; while( $_[2] == 0 || $_[3] == 0 ) { @_ = ( $_[0], @_[2..3], $_[1] ); } if( $_[0] != 0 ) { @_ = ( @_[1..2], $_[0], $_[3] ); } while( $_[1] != 1 ) { @_[1..3] = ( @_[2..3], $_[1] ); } return @_; } sub _order { my( $vertices ) = @_; my @sorted = sort { $vertices->{$a}{number} <=> $vertices->{$b}{number} } keys %$vertices; return $vertices->{shift @sorted}; } 1; Chemistry-OpenSMILES-0.7.0/META.json0000644000200400020040000000301414137221046016576 0ustar andriusandrius{ "abstract" : "OpenSMILES format reader and writer", "author" : [ "Andrius Merkys " ], "dynamic_config" : 0, "generated_by" : "Dist::Zilla version 6.010, CPAN::Meta::Converter version 2.150010", "license" : [ "bsd" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, "name" : "Chemistry-OpenSMILES", "prereqs" : { "build" : { "requires" : { "Module::Build" : "0.28", "Module::Build::Parse::Yapp" : "v0.1.2" } }, "configure" : { "requires" : { "Module::Build" : "0.28", "Module::Build::Parse::Yapp" : "v0.1.2" } }, "runtime" : { "requires" : { "Graph" : "0.97", "List::Util" : "1.45", "Parse::Yapp" : "0", "perl" : "5.010000" } }, "test" : { "requires" : { "Test::More" : "0" } } }, "release_status" : "stable", "resources" : { "bugtracker" : { "web" : "https://github.com/merkys/chemistry-opensmiles/issues" }, "homepage" : "https://search.cpan.org/dist/Chemistry-OpenSMILES", "repository" : { "type" : "git", "url" : "git://github.com/merkys/chemistry-opensmiles.git", "web" : "https://github.com/merkys/chemistry-opensmiles" } }, "version" : "0.7.0", "x_serialization_backend" : "Cpanel::JSON::XS version 3.0239" }