swaml-0.1.1/ 0000755 0001750 0001750 00000000000 11362031135 012171 5 ustar sergio sergio swaml-0.1.1/includes/ 0000755 0001750 0001750 00000000000 11362031135 013777 5 ustar sergio sergio swaml-0.1.1/config/ 0000755 0001750 0001750 00000000000 11362031135 013436 5 ustar sergio sergio swaml-0.1.1/setup/ 0000755 0001750 0001750 00000000000 11362031135 013331 5 ustar sergio sergio swaml-0.1.1/manpages/ 0000755 0001750 0001750 00000000000 11362031135 013764 5 ustar sergio sergio swaml-0.1.1/src/ 0000755 0001750 0001750 00000000000 11362031135 012760 5 ustar sergio sergio swaml-0.1.1/test/ 0000755 0001750 0001750 00000000000 11362031134 013147 5 ustar sergio sergio swaml-0.1.1/includes/ui/ 0000755 0001750 0001750 00000000000 11362031135 014414 5 ustar sergio sergio swaml-0.1.1/includes/apache/ 0000755 0001750 0001750 00000000000 11362031135 015220 5 ustar sergio sergio swaml-0.1.1/src/swaml/ 0000755 0001750 0001750 00000000000 11362031135 014103 5 ustar sergio sergio swaml-0.1.1/test/swaml/ 0000755 0001750 0001750 00000000000 11362031135 014273 5 ustar sergio sergio swaml-0.1.1/includes/ui/web/ 0000755 0001750 0001750 00000000000 11362031135 015171 5 ustar sergio sergio swaml-0.1.1/includes/ui/text/ 0000755 0001750 0001750 00000000000 11362031135 015400 5 ustar sergio sergio swaml-0.1.1/src/swaml/ui/ 0000755 0001750 0001750 00000000000 11362031135 014520 5 ustar sergio sergio swaml-0.1.1/src/swaml/rdf/ 0000755 0001750 0001750 00000000000 11362031135 014656 5 ustar sergio sergio swaml-0.1.1/src/swaml/common/ 0000755 0001750 0001750 00000000000 11362031135 015373 5 ustar sergio sergio swaml-0.1.1/src/swaml/storage/ 0000755 0001750 0001750 00000000000 11362031135 015547 5 ustar sergio sergio swaml-0.1.1/src/swaml/mail/ 0000755 0001750 0001750 00000000000 11362031135 015025 5 ustar sergio sergio swaml-0.1.1/test/swaml/ui/ 0000755 0001750 0001750 00000000000 11362031135 014710 5 ustar sergio sergio swaml-0.1.1/test/swaml/rdf/ 0000755 0001750 0001750 00000000000 11362031135 015046 5 ustar sergio sergio swaml-0.1.1/test/swaml/common/ 0000755 0001750 0001750 00000000000 11362031135 015563 5 ustar sergio sergio swaml-0.1.1/test/swaml/storage/ 0000755 0001750 0001750 00000000000 11362031134 015736 5 ustar sergio sergio swaml-0.1.1/test/swaml/mail/ 0000755 0001750 0001750 00000000000 11362031134 015214 5 ustar sergio sergio swaml-0.1.1/includes/ui/text/usage/ 0000755 0001750 0001750 00000000000 11362031135 016504 5 ustar sergio sergio swaml-0.1.1/src/swaml/rdf/sioc/ 0000755 0001750 0001750 00000000000 11362031135 015613 5 ustar sergio sergio swaml-0.1.1/test/swaml/rdf/sioc/ 0000755 0001750 0001750 00000000000 11362031135 016003 5 ustar sergio sergio swaml-0.1.1/Makefile 0000644 0001750 0001750 00000002124 10774155402 013642 0 ustar sergio sergio # SWAML Makefile
NAME=SWAML
DESTDIR =
ZIP=gzip
DOCDIR=doc
PYTHON=python
install:
cp setup/swaml $(DESTDIR)/usr/bin/swaml
chmod 755 $(DESTDIR)/usr/bin/swaml
mkdir -p $(DESTDIR)/usr/share/swaml/
cp -r __init__.py run.py src includes $(DESTDIR)/usr/share/swaml/
mkdir -p $(DESTDIR)/usr/share/doc/swaml/examples
cp -r config $(DESTDIR)/usr/share/doc/swaml/examples
mkdir tmp
$(ZIP) -9 -c manpages/swaml.1 > tmp/swaml.1.gz
mv tmp/swaml.1.gz $(DESTDIR)/usr/share/man/man1/
rm -r tmp
uninstall:
rm -f $(DESTDIR)/usr/bin/swaml
rm -rf $(DESTDIR)/usr/share/swaml
rm -rf $(DESTDIR)/usr/share/doc/swaml/examples
rm -f $(DESTDIR)/usr/share/man/man1/swaml.1.gz
gendoc:
rm -rf $(DOCDIR)
mkdir -p $(DOCDIR)
epydoc -n $(NAME) -o $(DOCDIR) --html __init__.py run.py src/swaml/
pubdoc: gendoc
scp -r $(DOCDIR)/* $(USER)@shell.berlios.de:/home/groups/swaml/htdocs/$(DOCDIR)/
tests:
@echo $(NAME) unittests
@echo ---------------
#$(PYTHON) test/swaml/rdf/swse.py
#$(PYTHON) test/swaml/rdf/sindice.py
clean:
rm -rf archive/
rm -rf $(DOCDIR)
rm -f `find . -name "*~"`
rm -f `find . -name "*.pyc"`
swaml-0.1.1/COPYING 0000644 0001750 0001750 00000035427 10435617232 013247 0 ustar sergio sergio GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Library General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
swaml-0.1.1/INSTALL 0000644 0001750 0001750 00000000677 11003322041 013223 0 ustar sergio sergio Installing SWAML
****************
Short Answer:
-------------
$ make install
Long Answer:
------------
Dependencies:
- python >= 2.4.0
- python-rdflib >= 2.4.0
- python-xml >= 0.8.4
- python-sparqlwrapper >= 1.1.0
As root user you may run:
$ make install
This command will install SWAML at common directories. Then it'll be available
a new command: 'swaml'.
To uninstall it's also easy:
$ make uninstall
swaml-0.1.1/README 0000644 0001750 0001750 00000003556 11003322041 013051 0 ustar sergio sergio
SWAML - Semantic Web Archive of Mailing Lists
*********************************************
SWAML, pronounced [swæml], is a research project around the
semantic web technologies to publish the mailing lists´s archive
into a RDF format, developed at University of Oviedo (Spain).
SWAML reads a collection of email messages stored in a mailbox
(from a mailing list compatible with RFC 4155) and generates a
RDF description. It is written in Python using SIOC as the main
ontology to represent in RDF a mailing list.
Our homepage is:
http://swaml.berlios.de
Development happens on BerliOS Forge:
http://developer.berlios.de/projects/swaml/
REQUIREMENTS:
-------------
- python >= 2.4.0
- python-rdflib >= 2.4.0
- python-xml >= 0.8.4
- python-sparqlwrapper >= 1.1.0
USAGE
-----
1) Create a configuration file (directory "config" contains some examples)
2) Run SWAML:
a) if it is not installed:
$ python run.py youconfig.ini
b) if it is installe din your system:
$ swaml yourconfig.ini
For the moment SWAML only supports mailbox format, so if you have a maildir
you will need to transform it:
$ find maildir/ -type f -print | while read f; do formail <$f; done > mbox
FEATURES:
---------
* platform independent (written in python)
* shell mode (to use manually or by cron script)
* compatible with RFC 4155
* serialize to disk in RDF/XML and XHTML+RDFa, using (optional) HTTP content negotiation
* reusability of ontologies already extended, mainly SIOC
* enrichment with FOAF using SWSE/Sindice as source of information
* KML support
* GTK browser (Buxon)
* free software under GPLv2 or later)
PLANNED FEATURES:
-----------------
* RSS exportation of last messages
* serialize to any RDFdb (sesame for example)
* incremental exportations
swaml-0.1.1/run.py 0000755 0001750 0001750 00000005143 11042375304 013361 0 ustar sergio sergio #!/usr/bin/python
# -*- coding: utf8 -*-
#
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2008 Sergio Fernández, Diego Berrueta
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Semantic Web Archive of Mailing Lists run file"""
import sys
sys.path.append('./src')
import os, string
from swaml.ui.consoleui import ConsoleUI
from swaml.common.configuration import Configuration
from swaml.rdf.sioc.mailinglist import MailingList
try:
import rdflib
except:
print 'RDFLib is required'
sys.exit(-1)
class SWAML(ConsoleUI):
"""
Main class of SWAML project
@author: Sergio Fdez
@license: GPL
"""
def parseArgs(self, argv):
"""
Getting params of default input
@param argv: arguments values array
"""
if not self.config.parse(argv):
self.usage()
#self.config.show()
def version(self):
"""
Print program version
"""
print "SWAML 0.1.0", #TODO: __init__.__version__
sys.exit()
def __init__(self, argv, base=None):
"""
Main method
@param argv: values of inline arguments
"""
if (base == None):
path = __file__.split('/')
base = '/'.join(path[:-1]) + '/'
ConsoleUI.__init__(self, 'swaml', base)
self.config = Configuration()
for arg in argv:
if arg == "-h" or arg == "--help":
self.usage()
elif arg == "-v" or arg == "--version":
self.version()
self.config.setAgent('http://swaml.berlios.de/doap#swaml') #TODO: how __init__.__agent__?
self.parseArgs(argv)
if (base == None):
self.list = MailingList(self.config)
else:
self.list = MailingList(self.config, base)
messages = self.list.publish()
print str(messages), 'messages procesed'
if __name__ == '__main__':
try:
SWAML(sys.argv[1:])
except KeyboardInterrupt:
print 'Received Ctrl+C or another break signal. Exiting...'
swaml-0.1.1/ChangeLog 0000644 0001750 0001750 00000002612 11362021743 013750 0 ustar sergio sergio
2010-04-16 0.1.1 - Updated to sioc:UserAccount
- Fixed problem importing XML module
- Many other minor bugs fixed
2008-07-25 0.1.0 - Patched to work with latest versions of RDFLib
- Added XHTML+RDFa+GRDDL support
- Adopted new URIs scheme
- Added content negotiation using apache rules
- Created a new property (swaml:address) to describe the
email address of a mailing list
- Used external services (SWSE and Sindice) to enrich
subscribers information
- Many other small improvements and bug fixes
2006-12-28 0.0.5 - Buxon parted as an independent tool
- Added a wizard to write configuration files
- Another minor changes according to Debian policies
2006-11-21 0.0.4 - Minor bugs fixed
- sioc:Site class added
- New inheritance scheme for classes with any type of UI
- Added make file rules to install
2006-11-01 0.0.3 - Buxon, a sioc:Forum browser written in PyGTK, added
2006-10-13 0.0.2 - New configuration method based in INI files
- New scripts to add FOAF and KML independent support of main
script
2006-10-02 0.0.1 - First release
swaml-0.1.1/__init__.py 0000644 0001750 0001750 00000002317 11362021743 014311 0 ustar sergio sergio # -*- coding: utf-8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2010 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
__author__ = 'Sergio Fernández '
__contributors__ = ['Diego Berrueta ',
'Jose Emilio Labra ',
'Iván Frade ',
'Nacho Barrientos ']
__copyright__ = 'Copyright 2005-2008, Sergio Fernández'
__license__ = 'GNU General Public License'
__version__ = '0.1.1'
__date__ = "2010-04-16"
__url__ = 'http://swaml.berlios.de/'
__agent__ = 'http://swaml.berlios.de/doap#swaml'
swaml-0.1.1/AUTHORS 0000644 0001750 0001750 00000000473 10665762637 013274 0 ustar sergio sergio
Authors and Maintainers
***********************
Maintainer:
Sergio Fernández
Project Managers:
Diego Berrueta
Jose E. Labra
Colaborators:
Iván Frade
Debian package:
Nacho Barrientos
swaml-0.1.1/.project 0000644 0001750 0001750 00000000547 10650356224 013656 0 ustar sergio sergio
SWAML
org.python.pydev.PyDevBuilder
org.python.pydev.pythonNature
swaml-0.1.1/config/example-absolut-paths.ini 0000644 0001750 0001750 00000000510 10764005725 020365 0 ustar sergio sergio
[SWAML]
title = Example mail list
description = Example description
host = http://example.com/
dir = /var/www/lists/archives/example/
base = http://example.com/lists/archives/example/
mbox = /var/lib/mailman/archives/public/example.mbox
post = YYYY-MMM/post-ID
to = example@lists.example.com
kml = yes
search = swse
foaf = yes
swaml-0.1.1/config/example-relative-paths.ini 0000644 0001750 0001750 00000000416 10764005725 020534 0 ustar sergio sergio
[SWAML]
title = Example mail list
description = Example description
host = http://example.com/
dir = archive/
base = http://example.com/lists/archives/example/
mbox = example.mbox
post = YYYY-MMM/post-ID
to = example@list.example.com
kml = yes
search = swse
foaf = yes
swaml-0.1.1/setup/swaml 0000644 0001750 0001750 00000001757 11042374570 014421 0 ustar sergio sergio #!/usr/bin/python
# -*- coding: utf8 -*-
#
# SWAML
# Semantic Web Archive of Mailing Lists
#
# This is just a wrapper script for the SWAML main Python program.
#
# Copyright (C) 2006-2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
import sys
SWAML_PATH = '/usr/share/swaml/'
sys.path.append(SWAML_PATH + "src/")
sys.path.append(SWAML_PATH)
try:
from run import SWAML
sys.exit(SWAML(sys.argv[1:], SWAML_PATH))
except ImportError:
print 'SWAML cannot be found; please, ensure that it is installed correctly.'
sys.exit(1)
swaml-0.1.1/manpages/swaml.1 0000644 0001750 0001750 00000002226 11361620646 015205 0 ustar sergio sergio .TH SWAML "1" "December 2006" "swaml" "User Commands"
.SH NAME
\fBSWAML\fP \- Semantic Web Archive of Mailing Lists
.SH SYNOPSIS
\fBswaml\fP \fIconfigfile\fP [options]
.SH DESCRIPTION
\fBSWAML\fP transform the archives of a mailing list (in mbox format)
into a semantic web friendly format (RDF in XML).
.PP
It reads a collection of email messages stored in a mailbox (from a
mailing list compatible with RFC 4155) and generates a RDF description.
It is written in Python using SIOC as the main ontology to represent
in RDF a mailing list.
.PP
.SH OPTIONS
.TP
.B
\fB\-h\fR, \fB\-\-help\fR
Print help text and exit.
.TP
.B
\fB\-v, --version\fP
Print version and exit.
.SH EXAMPLES
On directory /usr/share/doc/swaml/examples you should find some
configuration file examples.
.SH AUTHOR
Manpage written by Nacho Barrientos and
Sergio Fernandez for the Debian GNU/Linux system (but
may be used by others).
.SH BUGS
Report bugs to Debian BTS or to http://swaml.berlios.de/bugs.
.SH COPYRIGHT
Copyright \(co 2006, Sergio Fernandez. Licensed under GPLv2 license.
.PP
.nf
.fam C
http://swaml.berlios.de/
.SH "SEE ALSO"
.BR buxon (1)
swaml-0.1.1/includes/apache/htaccess-files.tpl 0000644 0001750 0001750 00000001350 11003373640 020637 0 ustar sergio sergio # SWAML htaccess rules
RewriteEngine On
RewriteBase {BASE}
AddType application/rdf+xml .rdf
#Options -MultiViews
# Rewrite rule to serve forum instance
RewriteRule ^forum$ forum.rdf [R=303]
# Rewrite rule to serve subscriber instance
RewriteRule ^subscribers/(.*)$ subscribers.rdf [R=301]
RewriteRule ^subscriber$ subscribers.rdf [R=303]
# Rewrite rule to serve HTML content with a post intance
RewriteCond %{HTTP_ACCEPT} text/html [OR]
RewriteCond %{HTTP_ACCEPT} application/xhtml\+xml [OR]
RewriteCond %{HTTP_USER_AGENT} ^Mozilla/.*
RewriteRule ^{POSTURI}$ {POSTFILE}.html [R=303]
# Rewrite rule to serve RDF/XML content with a post intance
RewriteCond %{HTTP_ACCEPT} application/rdf\+xml
RewriteRule ^{POSTURI}$ {POSTFILE}.rdf [R=303]
swaml-0.1.1/includes/apache/htaccess-endpoint.tpl 0000644 0001750 0001750 00000000557 10643422571 021374 0 ustar sergio sergio # SWAML htaccess rules
# FIXME: customize base URI
# FIXME: dynamic customize of last rule
RewriteEngine On
Options -MultiViews
RewriteRule ^(.*) http://wopr:8180/openrdf-http-server-2.0-beta5/repositories/prueba/?query=CONSTRUCT+{+?y+?z}+WHERE+{+?y+?z}&queryLn=sparql [R=303]
swaml-0.1.1/src/swaml/__init__.py 0000644 0001750 0001750 00000001160 10644225305 016220 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2007 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
swaml-0.1.1/test/swaml/__init__.py 0000644 0001750 0001750 00000001161 10774155402 016415 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
swaml-0.1.1/includes/ui/web/swaml.css 0000644 0001750 0001750 00000000500 10644235662 017036 0 ustar sergio sergio
/* SWAML style template */
html, body {
color: #000000;
background: #cccccc;
font-family: Helvetica, Verdana, Arial, sans-serif;
margin: 0;
padding: 0;
}
div {
padding: 2em;
}
pre {
background-color: #ffffff;
padding: 1.5em;
}
p.credits {
text-align: right;
font-size: 0.8em;
padding: 2em;
}
swaml-0.1.1/src/swaml/ui/consoleui.py 0000644 0001750 0001750 00000002623 10644225305 017103 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2007 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Common console UI"""
import sys, os, string
from swaml.ui.ui import UI
class ConsoleUI(UI):
"""
Abstract class for Text-mode User Interfaces
"""
def usage(self):
"""
Print usage information
"""
path = self.base + 'usage/' + self.id + '.txt'
try:
for line in open(path):
print line,
except IOError, details:
print 'Problem reading from ' + path + ': ' + str(details)
sys.exit()
def __init__(self, id=None, base='./'):
"""
Constructor method
@param id: string id
@param base: base directory
"""
UI.__init__(self, id, base+'includes/ui/text/')
swaml-0.1.1/src/swaml/ui/__init__.py 0000644 0001750 0001750 00000001160 10644225305 016635 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2007 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
swaml-0.1.1/src/swaml/ui/ui.py 0000644 0001750 0001750 00000002067 10644225305 015522 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2007 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Common functions for UIs"""
import sys, os, string
class UI:
"""
Abstract class for User Interfaces
"""
def usage(self):
"""
Print usage information
"""
pass
def __init__(self, id=None, base='./'):
"""
Constructor method
@param id: string id
@param base: base directory
"""
self.id = id
self.base = base
swaml-0.1.1/src/swaml/rdf/kml.py 0000644 0001750 0001750 00000011431 11361630250 016015 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2007 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Google KML basic support"""
import sys, os, string
import xml.dom.minidom
from xml.dom.minidom import getDOMImplementation
class KML:
"""
KML format support
"""
def __init__(self):
"""
KML document constructor
"""
self.places = []
self.ns = 'http://earth.google.com/kml/2.0'
def addPlace(self, lat, lon, name=None, description=None):
"""
Add a new placemark
@param lat: latitude
@param lon: longitude
@param name: place name
@param description: place description
"""
self.places.append(Place(lat, lon, name, description))
def write(self, file):
"""
Serialize into KML 2.0 format
@param file: file object
"""
#root nodes
doc = getDOMImplementation().createDocument(None, "kml", None)
root = doc.documentElement
root.setAttribute('xmlns', self.ns)
#and placesmarks
for place in self.places:
placemark = doc.createElement('Placemark')
root.appendChild(placemark)
#information nodes
name_text = place.getName()
if (name_text != None):
name = doc.createElement('name')
name.appendChild(doc.createTextNode(name_text))
placemark.appendChild(name)
pic = place.getDescription()
if (pic != None):
description = doc.createElement('description')
desc = '
'
description.appendChild(doc.createTextNode(desc))
placemark.appendChild(description)
#look at node
lookAt = doc.createElement('LookAt')
placemark.appendChild(lookAt)
#coordinates
latitude, longitude = place.getCoordinates()
lat = doc.createElement('latitude')
lat.appendChild(doc.createTextNode(str(latitude)))
lookAt.appendChild(lat)
lon = doc.createElement('longitude')
lon.appendChild(doc.createTextNode(str(longitude)))
lookAt.appendChild(lon)
#other vars
#range = doc.createElement('range')
#range.appendChild(doc.createTextNode('0'))
#lookAt.appendChild(range)
#tilt = doc.createElement('tilt')
#tilt.appendChild(doc.createTextNode('0'))
#lookAt.appendChild(tilt)
#heading = doc.createElement('heading')
#heading.appendChild(doc.createTextNode('0'))
#lookAt.appendChild(heading)
#TODO: read KML specification to learn what are
#point
point = doc.createElement('Point')
coordinates = doc.createElement('coordinates')
coordinates.appendChild(doc.createTextNode(str(longitude) + ',' + str(latitude) + ',0'))
point.appendChild(coordinates)
placemark.appendChild(point)
#and dump it in pretty xml format
file.write(doc.toprettyxml(encoding="utf-8"))
class Place:
"""
Place abstraction for KML lib
"""
def __init__(self, lat, lon, name=None, description=None):
"""
New placemark
@param lat: latitude
@param lon: longitude
@param name: place name
@param description: place description
"""
self.name = name
self.description = description
self.lat = lat
self.lon = lon
def getName(self):
"""
Get placemark name
@return: place name
"""
return self.name
def getDescription(self):
"""
Get placemark description
@return: place description
"""
return self.description
def getCoordinates(self):
"""
Get placemark coordinates
@return: place coordinates tuple
"""
return [self.lat, self.lon]
swaml-0.1.1/src/swaml/rdf/swse.py 0000644 0001750 0001750 00000003722 11003365562 016223 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""
A simple python wrapper for SWSE
seeAlso: http://swse.deri.org/
"""
from SPARQLWrapper import SPARQLWrapper, JSON
class SWSE:
def __init__(self):
"""
SWSE constructor
"""
self.service = "http://swse.deri.org/yars2/query"
def query(self, query):
"""
SWSE Query
@param query: sparql query
@return: results
"""
queryResults = []
try:
sparql = SPARQLWrapper(self.service)
#sparql = SPARQLWrapper(self.service, agent="swaml (http://swaml.berlios.de/; sergio@wikier.org)")
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
if results.has_key("results"):
results = results["results"]["bindings"]
for result in results:
if (len(result.keys()) == 1):
queryResults.append(result[result.keys()[0]]['value'])
else:
one = {}
for key in result.keys():
one[key] = result[key]['value']
queryResults.append(one)
except Exception:
#print "Exception calling SWSE" #FIXME
pass
return queryResults
swaml-0.1.1/src/swaml/rdf/namespaces.py 0000644 0001750 0001750 00000003111 11361624723 017355 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2008 Sergio Fernández, Diego Berrueta
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Common namespaces"""
from rdflib import Namespace
RDF = Namespace(u"http://www.w3.org/1999/02/22-rdf-syntax-ns#")
RDFS = Namespace(u"http://www.w3.org/2000/01/rdf-schema#")
SWAML = Namespace(u"http://swaml.berlios.de/ns/0.3#")
SIOC = Namespace(u"http://rdfs.org/sioc/ns#")
SIOCT = Namespace(u"http://rdfs.org/sioc/types#")
DC = Namespace(u"http://purl.org/dc/terms/")
DCT = Namespace(u"http://purl.org/dc/terms/")
FOAF = Namespace(u"http://xmlns.com/foaf/0.1/")
GEO = Namespace(u"http://www.w3.org/2003/01/geo/wgs84_pos#")
MVCB = Namespace(u"http://webns.net/mvcb/")
ICAL = Namespace(u"http://www.w3.org/2002/12/cal/icaltzd#")
XSD = Namespace(u"http://www.w3.org/2001/XMLSchema#")
NSbindings = {
u"rdf" : RDF,
u"rdfs" : RDFS,
u"swaml" : SWAML,
u"sioc" : SIOC,
u"sioct" : SIOCT,
u"dc" : DC,
u"dct" : DCT,
u"foaf" : FOAF,
u"geo" : GEO,
u"mvcb" : MVCB,
u"ical" : ICAL,
u"xsd" : XSD
}
swaml-0.1.1/src/swaml/rdf/__init__.py 0000644 0001750 0001750 00000001160 10644225305 016773 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2007 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
swaml-0.1.1/src/swaml/rdf/sindice.py 0000644 0001750 0001750 00000006665 11044264526 016674 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""
A python client for Sindice.com
See also: http://sindice.com/developers/api
"""
import urllib
import urllib2
import simplejson
import warnings
class Sindice:
def __init__(self):
"""
Sindice constructor
"""
self.service1 = "http://sindice.com/query/v1/lookup?%s&format=txt"
self.service2 = "http://api.sindice.com/v2/search?q=%s&qt=%s"
def __request(self, uri, accept="application/json"):
"""
Generic request
@param uri: uri to request
@return: response
@rtype: file-like object
"""
headers = {
"User-Agent" : "swaml (http://swaml.berlios.de/; sergio@wikier.org)",
"Accept" : accept
}
request = urllib2.Request(uri, headers=headers)
return urllib2.urlopen(request)
def lookupURIs(self, uri):
"""
Lookup URIs
@param uri: uri to query
@return: results
@rtype: list
"""
warnings.warn("This method is deprecated becuase it uses the old Sindice's API", DeprecationWarning, stacklevel=3)
print "TODO"
return []
def lookupKeywords(self, keyword):
"""
Lookup keywords
@param keyword: keyword to query
@return: picture results
@rtype: list
"""
warnings.warn("This method is deprecated becuase it uses the old Sindice's API", DeprecationWarning, stacklevel=3)
print "TODO"
return []
def lookupIFPs(self, property, object):
"""
Lookup IFPs
@param property: property to query
@param object: object
@return: results
@rtype: list
"""
warnings.warn("This method is deprecated becuase it uses the old Sindice's API", DeprecationWarning, stacklevel=3)
query = "property=%s&object=%s" % (property, object)
uri = self.service1 % query
response = self.__request(uri, accept="text/plain")
results = []
for line in response:
line = line.split("\t")
results.append((line[0], line[1]))
return results
def query(self, query, qt="term"):
"""
An advanced query
@param triple: triple/s to query
@return: results
@rtype: list
"""
uri = self.service2 % (urllib.quote(query), qt)
response = self.__request(uri)
results = []
json = simplejson.load(response)
for entry in json["entries"]:
link = entry["link"]
if not link in results:
results.append(link)
return results
def sparql(self, query):
"""
A SPARQL translator
"""
print "TODO"
return []
swaml-0.1.1/src/swaml/rdf/foaf.py 0000644 0001750 0001750 00000027074 11043541075 016162 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Util services to work with FOAF"""
import sys, os, string, sha
from rdflib.Graph import ConjunctiveGraph
from rdflib.sparql.sparqlGraph import SPARQLGraph
from rdflib.sparql.graphPattern import GraphPattern
from rdflib.sparql import Query
from rdflib.sparql.bison import Parse
from rdflib import Namespace, Literal
from swaml.rdf.namespaces import SIOC, RDF, RDFS, FOAF, GEO, NSbindings
from swaml.rdf.sindice import Sindice
from swaml.rdf.swse import SWSE
from email.Header import decode_header
class FOAFS:
"""
Collection of util services to SWAML
"""
def __init__(self, config=None):
"""
FOAF services constructor
"""
self.__actualDoc = None
self.__graph = None
self.config = config
def getFoaf(self, mail):
"""
Services to obtain FOAF URI from an email address
@param mail: an email address
@type mail: string
@return: the FOAF file and his FOAF URI of this email owner
@rtype: tuple
"""
mail_sha1sum = self.getShaMail(mail)
return self.getFoafFromSha(mail_sha1sum)
def getFoafFromSha(self, mail_sha1sum):
"""
Obtain FOAF URI from an email sha1sum, provided by a external service
@param mail_sha1sum: an email address sha1sum
@type mail_sha1sum: string
@return: the document and his FOAF URI of this coded email owner
@rtype: tuple
"""
if (self.config != None and self.config.get('search').lower() == 'sindice'):
return self.getFoafWithSindice(mail_sha1sum)
else:
return self.getFoafWithSWSE(mail_sha1sum)
def getFoafWithSindice(self, mail_sha1sum):
"""
Obtain FOAF URI from an email sha1sum, provided by sindice.com
@param mail_sha1sum: an email address sha1sum
@type mail_sha1sum: string
@return: the document and his FOAF URI of this coded email owner
@rtype: tuple
"""
s = Sindice()
results = s.lookupIFPs("http://xmlns.com/foaf/0.1/mbox_sha1sum", mail_sha1sum)
return self.__getBestURI(results)
def getFoafWithSWSE(self, mail_sha1sum):
"""
Obtain FOAF URI from an email sha1sum, provided by swse.deri.org
@param mail_sha1sum: an email address sha1sum
@type mail_sha1sum: string
@return: the document and his FOAF URI of this coded email owner
@rtype: tuple
"""
query = """
PREFIX rdf:
PREFIX foaf:
SELECT DISTINCT ?file ?person
WHERE {
?file foaf:primaryTopic ?person .
?person rdf:type foaf:Person .
?person foaf:mbox_sha1sum "%s"
}
"""
swse = SWSE()
results = swse.query(query % mail_sha1sum)
if len(results) > 0:
i = 0
while ((i
PREFIX rdfs:
PREFIX foaf:
SELECT DISTINCT ?file ?person
WHERE {
?person rdf:type foaf:Person .
?person foaf:mbox_sha1sum "%s" .
?person rdfs:seeAlso ?file
}
"""
results2 = swse.query(query2 % mail_sha1sum)
if len(results2) > 0:
i = 0
while ((i foaf:primaryTopic ?person .
# ?person rdf:type foaf:Person .
# ?person foaf:mbox_sha1sum "%s"@en
# }
# """ % (doc, mbox) )
query = Parse("""
SELECT ?person
WHERE {
<%s> foaf:primaryTopic ?person .
?person rdf:type foaf:Person .
}
""" % doc )
queryResults = g.query(query, initNs=NSbindings).serialize('python')
if len(queryResults) > 0 :
return (doc, queryResults[0])
except Exception, details:
print details
if (len(possibilities)>0):
return (possibilities[0][0], None)
else:
return (None, None)
def __getGraph(self, doc):
"""
A simple mechanism to cache foaf graph
@param foaf: a foaf uri
@return: the graph with the foaf loaded
@rtype: rdflib.Graph.ConjunctiveGraph
"""
#tip to set socket timeout global var
import socket
socket.setdefaulttimeout(10) #timeout in seconds
if (self.__actualDoc != doc or self.__graph == None):
self.__actualDoc = doc
self.__graph = ConjunctiveGraph()
try:
self.__graph.parse(doc)
except:
self.__graph = None
return self.__graph
def getGeoPosition(self, foaf, doc, sha1mail):
"""
Obtain geography information from foaf
@param foaf: person uri
@param doc: document that contains that person
@param sha1mail: mail addess enconded
@return: coordinates
"""
if (doc != None):
graph = self.__getGraph(doc)
if (graph != None):
query = """
SELECT ?lat ?lon
WHERE {
<%s> rdf:type foaf:Person .
<%s> foaf:based_near ?point .
?point rdf:type geo:Point .
?point geo:lat ?lat .
?point geo:long ?lon
}
""" % (foaf,foaf)
results = graph.query(Parse(query), initNs=NSbindings).serialize('python')
if len(results) > 0 :
return (results[0][0], results[0][1])
else:
query2 = """
SELECT ?lat ?lon
WHERE {
?person rdf:type foaf:Person .
?person foaf:mbox_sha1sum "%s"@en .
?person foaf:based_near ?point .
?point rdf:type geo:Point .
?point geo:lat ?lat .
?point geo:long ?lon .
}
""" % sha1mail
results2 = graph.query(Parse(query2), initNs=NSbindings).serialize('python')
if len(results2) > 0 :
return (results2[0][0], results2[0][1])
return (None, None)
def getPic(self, foaf, sha1mail):
"""
Get picture from FOAF
@param foaf: a foaf uri
@param sha1mail: mail addess enconded
@return: picture url
"""
graph = self.__getGraph(foaf)
if (graph != None):
sparqlGr = SPARQLGraph(graph)
select = ('?pic')
where = GraphPattern([ ('?x', RDF['type'], FOAF['Person']),
('?x', FOAF['mbox_sha1sum'], sha1mail),
('?x', FOAF['depiction'], '?pic')
])
result = Query.query(sparqlGr, select, where)
for one in result:
return one
return None
def getHomepage(self, foaf, sha1mail):
"""
Get homepage from FOAF
@param foaf: a foaf uri
@param sha1mail: mail addess enconded
@return: homepage url
"""
graph = self.__getGraph(foaf)
if (graph != None):
sparqlGr = SPARQLGraph(graph)
select = ('?homepage')
where = GraphPattern([ ('?x', RDF['type'], FOAF['Person']),
('?x', FOAF['mbox_sha1sum'], sha1mail),
('?x', FOAF['homepage'], '?homepage')
])
result = Query.query(sparqlGr, select, where)
for one in result:
return one
return None
def getShaMail(self, mail):
"""
Services to obtain encrypted email address
@param mail: an email address
@type mail: string
@return: encryted mail on foaf:mbox_sha1sum format
@rtype: string
"""
mail = mail.lower() # I'm no sure if it's a good idea...
return sha.new('mailto:'+mail).hexdigest()
swaml-0.1.1/src/swaml/common/configuration.py 0000644 0001750 0001750 00000007441 10764005725 020635 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Configuration related code"""
import string
from ConfigParser import ConfigParser
class Configuration:
"""Class to encapsulate SWAML's configuration"""
def __init__(self):
"""
Constructor method
"""
#default values
self.config = {
'title' : '',
'description' : '',
'host' : '',
'verbose' : False,
'dir' : 'archive/',
'base' : 'http://localhost/swaml/',
'mbox' : 'mbox',
'post' : 'YYYY-MMM/messageID',
'to' : 'foo@bar.com',
'kml' : True,
'search': 'swse',
'foaf' : True
}
self.antispam = ' AT '
def parse(self, argv):
"""
Getting params of default input
@param argv: arguments values array
@return: parse ok
@rtype: boolean
@todo: process one o more lists
"""
if (len(argv) == 0):
return False
else:
path = argv[0]
config = ConfigParser()
try:
config.read(path)
except:
print 'Error parsing config file'
section = 'SWAML'
if (config.has_section(section)):
for option in config.options(section):
if not self.set(option, config.get(section, option)):
print 'unknow option in ' + path
return False
else:
print 'No SWAML section founded'
return False
return True
def getAntiSpam(self):
"""
String to fight against the SPAM
"""
return self.antispam;
def get(self, var):
"""
Method to get a configuration property
@param var: var key
"""
if (var in self.config.keys()):
return self.config[var]
def getAgent(self):
"""
Return the agent URL
"""
return self.agent
def set(self, var, value):
"""
Method to set a configuration property
@param var: var key
@param value: value var
"""
if (var in self.config.keys()):
#two litle exceptions in var format
if ((var == 'dir' or var == 'base') and value[-1] != '/'):
value += '/'
elif (var == 'kml' or var == 'foaf'):
if (value.lower() == 'no'):
value = False
else:
value = True
self.config[var] = value
return True
else:
return False
def setAgent(self, agent):
"""
Store the agent's url
@param agent: agent uri
"""
self.agent = agent
def show(self):
"""
Show all configure options
"""
for var in self.config.keys():
print var + ': ' + str(self.config[var])
swaml-0.1.1/src/swaml/common/configWizard.py 0000755 0001750 0001750 00000005615 10764217350 020417 0 ustar sergio sergio #!/usr/bin/python
# -*- coding: utf8 -*-
#
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Wizard to create config files for SWAML"""
import sys, os, string
from swaml.ui.ui import ConsoleUI
from swaml.common.configuration import Configuration
import ConfigParser
class ConfigWizard(ConsoleUI):
"""
SWAML's config wizard
@author: Sergio Fdez
@license: GPL
"""
def requestData(self):
"""
Queries the user a new configuration
"""
self.config = Configuration()
print 'Write your configuration options:'
print '(default value goes between [...])'
for var in self.config.config.keys():
defaultValue = str(self.config.config[var])
value = raw_input('\t - ' + var + '[' + defaultValue + ']: ')
if (len(value) > 0):
self.config.set(var, value)
def printData(self):
"""
Dump on hard disk the configuration
"""
ini = ConfigParser.ConfigParser()
ini.add_section(self.section)
for var in self.config.config.keys():
ini.set(self.section, var, str(self.config.config[var]))
try:
file = open(self.output, 'w+')
ini.write(file)
file.flush()
file.close()
print 'new config file created in', self.output, 'with chosen parameters'
except IOError, detail:
print 'Error exporting coordinates config file: ' + str(detail)
def wizard(self):
"""
Executes all the wizard functions
"""
self.requestData()
self.printData()
def __init__(self, argv):
"""
main method
@param argv: values of inline arguments
"""
ConsoleUI.__init__(self, 'configWizard')
self.section = 'SWAML'
for arg in argv:
if arg == "-h" or arg == "--help":
self.usage()
if (len(argv)>=1):
self.output = argv[0]
self.wizard()
else:
self.usage()
if __name__ == '__main__':
try:
ConfigWizard(sys.argv[1:])
except KeyboardInterrupt:
print 'Received Ctrl+C or another break signal. Exiting...'
swaml-0.1.1/src/swaml/common/__init__.py 0000644 0001750 0001750 00000001160 10644225305 017510 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2007 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
swaml-0.1.1/src/swaml/common/date.py 0000644 0001750 0001750 00000010055 10644225305 016671 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2007 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Utils functions to work with dates"""
import sys, os, string
import email.Utils
import time
class Date:
def __init__(self, date):
"""
Date constructor
"""
self.date = date
def getDay(self):
"""
Get day value
"""
return self.date[2]
def getStringDay(self):
"""
Get day as string
"""
day = self.getDay()
if (day < 10):
return ('0' + str(day))
else:
return str(day)
def getMonth(self):
"""
Get month value
"""
return self.date[1]
def getStringMonth(self):
"""
Get month in string number format
"""
month = self.getMonth()
if (month < 10):
return ('0' + str(month))
else:
return str(month)
def getShortStringMonth(self):
"""
Get month in short string format
"""
shortMonths = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
return shortMonths[self.getMonth() - 1]
def getLongStringMonth(self):
"""
Get month in long string format
"""
longMonths = ['January', 'February', 'March', 'April',
'May', 'June', 'July', 'August',
'September', 'October', 'November', 'December']
return longMonths[self.getMonth() - 1]
def getYear(self):
"""
Get year value
"""
return self.date[0]
def getStringYear(self):
"""
Get year string
"""
return str(self.getYear())
def getNumericFormat(self):
"""
Get int values
"""
return [self.getYear(), self.getMonth(), self.getDay()]
def getInteger(self):
"""
Get long int value
"""
return (self.date[0]*10000000000 + self.date[1]*100000000 +
self.date[2]*1000000 + self.date[3]*10000 +
self.date[4]*100 + self.date[5])
def getStringFormat(self, format='iso'):
"""
Get string format
@param format: standar
"""
year = self.getStringYear()
month = self.getStringMonth()
day = self.getStringDay()
if(format == 'normal'):
#normal format: day-month-year
return day + '-' + month + '-' + year
else:
#iso: year-month-day
return year + '-' + month + '-' + day
class MailDate(Date):
"""
Utils functions for date of emails
"""
def __init__(self, date):
"""
MailDate constructor
"""
self.date = email.Utils.parsedate(date)
if (self.date == None):
print 'Error parsing date: non valid format (' + date + ')'
#trying another format: dd.mm.yyyy
try:
tmp = date.split('.')
self.date = (int(tmp[2]), int(tmp[1]), int(tmp[0]),
0, 0, 0, 0, 1, -1)
except:
self.date = (1970, 1, 1, 0, 0, 0, 0, 1, -1)
class FileDate(Date):
"""
Utils functions for date of files
"""
def __init__(self, path):
"""
FileDate constructor
"""
self.date = time.localtime(os.stat(path)[8])
swaml-0.1.1/src/swaml/common/charset.py 0000644 0001750 0001750 00000005440 11003406665 017407 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2007 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Util function to work with charsets"""
import sys, os, string
from email.Header import decode_header
import urllib
class Charset:
"""
Collection of services related with charset and encondig
"""
def __init__(self, charset='iso-8859-1'):
"""
Charset constructor
@param charset: charset internacional code
"""
self.charset = charset
def encode(self, orig):
"""
Encode an string
@param orig: original string
"""
ret = ''
try:
ret = self.__unicode(orig, self.charset)
except Exception:
ret = self.__decode(orig)
return ret
def __decode(self, orig):
"""
Decode an string
@param orig: original string
@todo: performance this tip
"""
#tip because decode_header returns the exception
# ValueError: too many values to unpack
parted = orig.split(' ')
dest = ''
for one in parted:
[(s, enconding)] = decode_header(one)
if (dest == ''):
dest = s
else:
dest += ' ' + s
return dest
def __unicode(self, orig, charset):
"""
Decode an unicode string
@param orig: original string
@param charset: charset internacional code
"""
ret = ''
try:
ret = unicode(orig, charset)
except TypeError:
ret = orig
return orig
def fixCodification(original):
fixed = ""
for word in original.split(" "):
if (len(fixed)>0):
fixed += " "
parted = word.split('?')
if len(parted) >= 4:
codification = parted[1]
cad1 = parted[3]
cad2 = cad1.replace('=','%')
cad3 = cad2.replace('_',' ')
cad4 = unicode((cad3).decode(codification))
cad5 = urllib.unquote(cad4)
fixed += cad5
else:
fixed += word
return fixed
swaml-0.1.1/src/swaml/storage/__init__.py 0000644 0001750 0001750 00000001122 10636037217 017666 0 ustar sergio sergio # SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2006 Sergio Fdez
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
swaml-0.1.1/src/swaml/mail/mbox.py 0000644 0001750 0001750 00000002423 10736740675 016371 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2006 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Code to work over a mailbox file"""
import os, sys,string, mailbox
class Mbox:
"""
mailbox wrapper
"""
def __init__(self, path):
"""
Constructor method
@param path: mailbox path
"""
self.path = path
try:
self.mbox_file = mailbox.UnixMailbox(open(self.path))
except IOError:
print "mbox file does not exist, exiting gracefully"
sys.exit()
def nextMessage(self):
"""
Return next message of mbox file
@return: next message at mailbox
"""
return self.mbox_file.next()
swaml-0.1.1/src/swaml/mail/__init__.py 0000644 0001750 0001750 00000001160 10644225305 017142 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2007 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
swaml-0.1.1/test/swaml/ui/__init__.py 0000644 0001750 0001750 00000001161 10774155402 017032 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
swaml-0.1.1/test/swaml/rdf/swse.py 0000644 0001750 0001750 00000002751 10774155402 016420 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
import unittest
import sys
sys.path.append("./src")
from swaml.rdf.swse import SWSE
class TestSWSE(unittest.TestCase):
def testSergio(self):
query = """
PREFIX rdf:
PREFIX foaf:
SELECT DISTINCT ?person
WHERE {
?file foaf:primaryTopic ?person .
?person rdf:type foaf:Person .
?person foaf:mbox_sha1sum "d0fd987214f56f70b4c47fb96795f348691f93ab"
}
"""
swse = SWSE()
results = swse.query(query)
self.assertTrue(len(results)>0)
self.assertEquals(results[0], "http://www.wikier.org/foaf#wikier")
if __name__ == "__main__":
unittest.main()
swaml-0.1.1/test/swaml/rdf/__init__.py 0000644 0001750 0001750 00000001161 10774155402 017170 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
swaml-0.1.1/test/swaml/rdf/sindice.py 0000644 0001750 0001750 00000003713 11022175771 017052 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
import sys
sys.path.append("./src")
import unittest
from rdflib.Graph import ConjunctiveGraph
from rdflib.sparql.bison import Parse
from swaml.rdf.sindice import Sindice
from swaml.rdf.namespaces import RDF, FOAF, NSbindings
class TestSindice(unittest.TestCase):
def setUp(self):
mbox = "d0fd987214f56f70b4c47fb96795f348691f93ab"
s = Sindice()
self.results = s.lookupIFPs("http://xmlns.com/foaf/0.1/mbox_sha1sum", mbox)
def tearDown(self):
self.results = None
def testFirst(self):
self.assertEquals(self.results[0][0], "http://www.wikier.org/foaf.rdf")
def testQueryingMore(self):
for result in self.results:
uri = result[0]
g = ConjunctiveGraph()
g.parse(uri)
query = Parse("""
SELECT ?person
WHERE {
<%s> foaf:primaryTopic ?person .
?person rdf:type foaf:Person .
}
""" % uri )
queryResults = g.query(query, initNs=NSbindings).serialize('python')
if (len(queryResults)>0):
self.assertEquals(str(queryResults[0]), "http://www.wikier.org/foaf#wikier")
if __name__ == "__main__":
unittest.main()
swaml-0.1.1/test/swaml/common/__init__.py 0000644 0001750 0001750 00000001161 10774155402 017705 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
swaml-0.1.1/test/swaml/storage/__init__.py 0000644 0001750 0001750 00000001161 10774155402 020061 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
swaml-0.1.1/test/swaml/mail/__init__.py 0000644 0001750 0001750 00000001161 10774155402 017337 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
swaml-0.1.1/includes/ui/text/usage/foaf.txt 0000644 0001750 0001750 00000000572 10527057355 020202 0 ustar sergio sergio
Usage: foaf.py subscribers-rdf-input [subscribers-rdf-output]
Enriches subscribers RDF file using related FOAF files.
subscribers-rdf-input : path to a subscribers.rdf generated by swaml
subscribers-rdf-output : path to output enriched file
Options:
-h, --help : print this help message and exit.
Report bugs to:
swaml-0.1.1/includes/ui/text/usage/configWizard.txt 0000644 0001750 0001750 00000000346 10531424125 021700 0 ustar sergio sergio
Usage: configWizard.py dest
Helps to write a SWAML's configuration file.
dest : destination file
Options:
-h, --help : print this help message and exit.
Report bugs to:
swaml-0.1.1/includes/ui/text/usage/swaml.txt 0000644 0001750 0001750 00000000607 10540744453 020406 0 ustar sergio sergio
Usage: swaml configfile [options]
'swaml' transform the archives of a mailing list (in mbox format) into a
semantic web friendly format (RDF in XML).
'configfile' : path to a configuration file compatible with RFC822.
Options:
-v, --version : show version.
-h, --help : print this help message and exit.
Report bugs to:
swaml-0.1.1/includes/ui/text/usage/kml.txt 0000644 0001750 0001750 00000000631 10527057355 020046 0 ustar sergio sergio
Usage: kml.py subscribers-rdf-file [output-kml-file]
Export geographic information about SWAML's subscribers, described in RDF,
into KML.
subscribers-rdf-file : path to a subscribers.rdf generated by swaml
output-kml-file : path to output KML file, optional argument
Options:
-h, --help : print this help message and exit.
Report bugs to:
swaml-0.1.1/src/swaml/rdf/sioc/index.py 0000644 0001750 0001750 00000005430 11362021071 017274 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2006 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Indexing messages"""
import sys, os, string, sha
class Index:
"""
Messages index
"""
def __init__(self, config):
"""
Index constructor
@param config: reference to the configuration
"""
self.config = config
self.items = []
self.translateIndex = {}
def add(self, new):
"""
Add new item
@param new: new item
"""
#store message
self.items.append(new)
#and translation
id = new.getMessageId() #FIXME, bug #8295
if (id in self.translateIndex):
print 'Duplicated message id: ' + id + ' (see more on bug #8295)'
#deliberately only we maintain the reference with the most
# recent message with this id (bug #8295)
self.translateIndex[id] = len(self.items)
def get(self, id):
"""
Get message who has an ID
@param id: message id
@return: message
"""
return self.getMessage(self.__getTranslation(id))
def getMessage(self, n):
"""
Get a message
@param n: message numeric id
"""
if (n != None and n <= len(self.items)):
return self.items[n-1]
else:
return None
def getMessageByUri(self, uri):
"""
Get a message by URI
@param uri: message uri
"""
for msg in self.items:
if (uri == msg.getUri()):
return msg
return None
def __getTranslation(self, id):
"""
Get the reference translation
@param id: message id
@return: translation
"""
if (id in self.translateIndex):
return self.translateIndex[id]
else:
return None
def getMessagesUri(self):
"""
Get all URIs into a list
@return: messages uris
"""
uris = []
for msg in self.items:
uris.append(msg.getUri())
return uris
swaml-0.1.1/src/swaml/rdf/sioc/subscribers.py 0000644 0001750 0001750 00000031277 11361622661 020536 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2007 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Subscribers management"""
import sys, os, string
import rdflib
from rdflib.Graph import ConjunctiveGraph
from rdflib import URIRef, Literal, BNode
from rdflib import RDF
from rdflib import Namespace
from swaml.rdf.namespaces import SIOC, RDF, RDFS, FOAF, GEO
from swaml.rdf.sioc.message import Message
from swaml.rdf.foaf import FOAFS
from swaml.rdf.kml import KML
from swaml.common.charset import fixCodification
class Subscriber:
"""
Subscriber abstraction
"""
id = 0
def __init__(self, name, mail, config):
"""
Subscriber constructor
@param name: name
@param mail: mail address
@param config: config params
"""
self.__class__.id += 1
self.id = self.__class__.id
self.setName(name)
self.setMail(mail)
self.foaf = None
self.doc = None
self.geo = [None, None]
self.pic = None
self.homepage = None
self.mails = []
self.config = config
def getName(self):
"""
Get subscriber's name
@return: name
"""
return self.name
def getMail(self):
"""
Get subscriber's mail address
@return: mail
"""
return self.mail
def getShaMail(self):
"""
Get subscriber's sha sum of mail address
@return: sha1mail
"""
return FOAFS().getShaMail(self.mail)
def getFoaf(self):
"""
Get subscriber's FOAF
@return: foaf uri
"""
return (self.doc, self.foaf)
def getSentMails(self):
"""
Get the array with subscriber sent mails ids
@return: sent mails list
"""
sent = []
for one in self.mails:
sent.append(one.getUri())
return sent
def getGeo(self):
"""
Obtain geo coordinates
@return: coordinates tuple
"""
return self.geo
def getPic(self):
"""
Return the uri of his picture
@return: picture url
"""
return self.pic
def getHomepage(self):
"""
Return his homepage
@return: homepage url
"""
return self.homepage
def getId(self):
"""
Return subscriber numeric id
@return: id
"""
return self.id
def getStringId(self):
"""
Return string id
@return: string id
"""
return 's' + str(self.getId())
def getUri(self):
"""
Return the subscriber's URI
@return: subscriber uri
"""
return self.config.get('base') + 'subscriber#' + self.getStringId()
def setName(self, name):
"""
Set subscriber's name
@param name: name
"""
if (len(name)>1 and name[0]=='"' and name[-1]=='"'):
self.name = name[1:-1]
else:
self.name = name
def setMail(self, mail):
"""
Set subscriber's mail address
@param mail: mail address
"""
self.mail = mail
def setFoaf(self, foaf):
"""
Set subscriber's FOAF
@param foaf: foaf uri
"""
if foaf.startswith("http://"):
self.foaf = foaf
def setDoc(self, doc):
"""
Set subscriber's document
@param foaf: doc url
"""
self.doc = doc
def addMail(self, new):
"""
Add new sent mail
@param new: newmail address
"""
self.mails.append(new)
def setGeo(self, lat, lon):
"""
Set coordinates
@param lat: latitude
@param lon: longitude
"""
self.geo = [lat, lon]
def setPic(self, uri):
"""
Set subscriber picture
@param uri: picture url
"""
self.pic = uri
def setHomepage(self, uri):
"""
Set subscriber's homepage
@param uri: homepage url
"""
self.homepage = uri
class Subscribers:
"""
Class to abstract the subscribers management
"""
def __init__(self, config):
"""
Constructor method
@param config: general configuration
"""
self.config = config
self.baseUri = self.config.get('base') + 'subscriber/'
self.subscribers = {}
def add(self, msg):
"""
Add a new subscriber
@param msg: new message
"""
name = fixCodification(msg.getFromName())
mail = msg.getFromMail()
if (not mail in self.subscribers):
self.subscribers[mail] = Subscriber(name, mail, self.config)
self.subscribers[mail].addMail(msg)
self.subscribers[mail].setName(name) #last name?
def get(self, mail):
"""
Get subscriber
@param mail: subscriber's mail address
"""
if (mail in self.subscribers):
return self.subscribers[mail]
else:
return None
def __toRDF(self):
"""
Dump to RDF file all subscribers
"""
if not (os.path.exists(self.config.get('dir'))):
os.mkdir(self.config.get('dir'))
#rdf graph
store = ConjunctiveGraph()
#namespaces
store.bind('sioc', SIOC)
store.bind('foaf', FOAF)
store.bind('rdfs', RDFS)
count = 0
#a Node for each subcriber
for mail, subscriber in self.subscribers.items():
count += 1
user = URIRef(subscriber.getUri())
store.add((user, RDF.type, SIOC['UserAccount']))
store.add((user, SIOC['subscriber_of'], URIRef(self.config.get('base') + 'forum')))
try:
name = subscriber.getName()
if (len(name) > 0):
store.add((user, SIOC['name'], Literal(name) ))
store.add((user, SIOC['email_sha1'], Literal(subscriber.getShaMail())))
if (self.config.get('foaf')):
foafDoc, foafUri = subscriber.getFoaf()
if (foafDoc != None):
store.add((user, RDFS['seeAlso'], URIRef(foafDoc)))
if (foafUri != None):
store.add((user, SIOC['account_of'], URIRef(foafUri)))
#coordinates
lat, lon = subscriber.getGeo()
if (lat != None and lon != None):
store.bind('geo', GEO)
geo = BNode()
store.add((user, FOAF['based_near'], geo))
store.add((geo, RDF.type, GEO['Point']))
store.add((geo, GEO['lat'], Literal(lat)))
store.add((geo, GEO['long'], Literal(lon)))
#depiction
pic = subscriber.getPic()
if (pic != None):
store.add((user, SIOC['avatar'], URIRef(pic)))
#homepage
homepage = subscriber.getHomepage()
if (pic != None):
store.add((user, FOAF['homepage'], URIRef(homepage)))
except UnicodeDecodeError, detail:
print 'Error proccesing subscriber ' + subscriber.getName() + ': ' + str(detail)
sentMails = subscriber.getSentMails()
if (len(sentMails)>0):
for uri in sentMails:
store.add((user, SIOC['creator_of'], URIRef(uri)))
#and dump to disk
try:
rdf_file = open(self.config.get('dir') + 'subscribers.rdf', 'w+')
store.serialize(destination=rdf_file, format="pretty-xml")
rdf_file.flush()
rdf_file.close()
print count, 'subscribers exported in RDF'
except IOError, detail:
print 'Error exporting subscribers to RDF: ' + str(detail)
def __toKML(self):
"""
Public subscribers' geography information,
if it's available in his foaf files,
into KML file
"""
kml = KML()
count = 0
for mail, subscriber in self.subscribers.items():
lat, lon = subscriber.getGeo()
pic = subscriber.getPic()
if ((lat != None) and (lon != None)):
count += 1
kml.addPlace(lat, lon, name=subscriber.getName(), description=pic)
#and dump to disk
try:
kml_file = open(self.config.get('dir') + 'subscribers.kml', 'w+')
kml.write(kml_file)
kml_file.flush()
kml_file.close()
print count, 'subcribers\' coordinates exported in KML'
except IOError, detail:
print 'Error exporting coordinates to KML: ' + str(detail)
def process(self):
"""
Process subscribers to obtain more semantic information
"""
foafserv = FOAFS(config=self.config)
self.foafEnriched = 0
for mail, subscriber in self.subscribers.items():
self.__copileFoafInfo(subscriber, foafserv) #get foaf information
self.__compact(subscriber, foafserv) #compact subscribers lis
#more ideas?
print self.foafEnriched, 'subscribers enriched using FOAF'
def __copileFoafInfo(self, subscriber, foafserv):
"""
Compile subscribers' information from his FOAFs
@param subscriber: subscriber reference
@param foafserv: FOAF service reference
"""
mail = subscriber.getMail()
doc, foaf = foafserv.getFoaf(mail)
if (foaf != None):
subscriber.setFoaf(foaf)
subscriber.setDoc(doc)
self.foafEnriched += 1
#coordinates
lat, lon = foafserv.getGeoPosition(foaf, doc, foafserv.getShaMail(mail))
if (lat != None and lon != None):
subscriber.setGeo(lat, lon)
#picture
pic = foafserv.getPic(foaf, foafserv.getShaMail(mail))
if (pic != None):
subscriber.setPic(pic)
#homepage
homepage = foafserv.getHomepage(foaf, foafserv.getShaMail(mail))
if (homepage != None):
subscriber.setHomepage(homepage)
def __compact(self, subscriber, foafserv):
"""
Compact mailing list subscribers
according his foaf information
@param subscriber: subscriber reference
@param foafserv: FOAF service reference
"""
#diego's idea: look on foaf if the subscriber uses more than one address
pass
def export(self):
"""
Export subscribers information into multiple
formats (RDF and KML)
"""
self.__toRDF()
if (self.config.get('kml')):
self.__toKML()
def getSubscribersUris(self):
"""
Get a list of subscribers' URIs
@return: subscribers uris
"""
uris = []
for mail, subscriber in self.subscribers.items():
uris.append(subscriber.getUri())
return uris
del sys, string
swaml-0.1.1/src/swaml/rdf/sioc/message.py 0000644 0001750 0001750 00000047324 11361630250 017625 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2007 Sergio Fernández, Diego Berrueta
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Mail message abstraction"""
import sys, os, string, sha
import datetime, email, email.Errors
from rdflib.Graph import ConjunctiveGraph
from rdflib import URIRef, Literal, BNode
from rdflib import RDF
from swaml.rdf.namespaces import SIOC, RDFS, FOAF, DC, DCT, MVCB, XSD
from swaml.common.charset import Charset, fixCodification
from swaml.common.date import MailDate, FileDate
import xml.dom.minidom
from xml.dom.minidom import getDOMImplementation, DocumentType
class Message:
"""
Mail message abstraction
"""
id = 0
def __init__(self, msg, config, sender=None):
"""
Message constructor
@param msg: plain message object
@param config: configuration reference
@param sender: author message reference
"""
self.__class__.id += 1
self.id = self.__class__.id
self.config = config
self.sender = sender
self.subject = msg['Subject']
self.messageId = msg['Message-Id']
self.date = msg['Date']
self.From = msg['From']
self.getAddressFrom = msg.getaddr('From')
try:
self.to = msg['To']
except:
#some mails have not a 'to' field
self.to = self.config.get('to')
try:
self.inReplyTo = msg['In-Reply-To']
except:
self.inReplyTo = None
self.parent = None
self.childs = []
self.__calculateId()
self.nextByDate = None
self.previousByDate = None
#Obtain message path
#FIXME: format permited vars (feature #1355)
index = self.config.get('post')
#message date
date = MailDate(self.date)
#replace vars
index = index.replace('DD', date.getStringDay()) #day
index = index.replace('MMMM', date.getLongStringMonth()) #long string month
index = index.replace('MMM', date.getShortStringMonth()) #short string month
index = index.replace('MM', date.getStringMonth()) #numeric month
index = index.replace('YYYY', date.getStringYear()) #year
index = index.replace('ID', str(self.id)) #swaml id
#create subdirs
dirs = index.split('/')[:-1]
index_dir = ''
for one_dir in dirs:
index_dir += one_dir + '/'
if not (os.path.exists(self.config.get('dir')+index_dir)):
os.mkdir(self.config.get('dir')+index_dir)
self.dir = index_dir
self.path = index
self.uri = self.config.get('base') + self.path
#body after indexing all messages
self.body = msg.fp.read()
#[(self.body, enconding)] = decode_header(msg.fp.read())
def setBody(self, body):
"""
Set body content
@param body: content
"""
try:
self.body = unicode(body) #fixCodification(body)
except:
self.body = ""
def setSender(self, sender):
"""
Set message's sender
@param sender: author
"""
self.sender = sender
def setParent(self, parent):
"""
Set parent message
@param parent: parent reference
"""
self.parent = parent.getUri()
def addChild(self, child):
"""
Add new child message
@param child: child reference
"""
self.childs.append(child.getUri())
def setNextByDate(self, next):
"""
Set next message by date
@param next: next message reference
"""
self.nextByDate = next.getUri()
def setPreviousByDate(self, previous):
"""
Set previous message by date
@param previous: previous message reference
"""
self.previousByDate = previous.getUri()
def getId(self):
"""
Get message ID
@return: id
"""
return self.id
def getSwamlId(self):
"""
Get message SWAML ID
@return: swaml id
"""
return self.swamlId
def getMessageId(self):
"""
Get message ID field
@return: id field
"""
return self.messageId
def getUri(self):
"""
Get message URI
@return: uri
"""
return self.uri
def getRdfPath(self):
return self.config.get('dir') + self.path + '.rdf'
def getRdfUrl(self):
return self.getUri() + '.rdf'
def getXhtmlPath(self):
return self.config.get('dir') + self.path + '.html'
def getXhtmlUrl(self):
return self.getUri() + '.html'
def getSender(self):
"""
Get message sender
@return: author
"""
return self.sender
def __parseFrom(self, from_text):
"""
Method to parse from field
@param from_text: from field
"""
from_parted = from_text.split(' ')
name = ' '.join(from_parted[:-1])
mail = from_parted[-1]
return [name, mail]
def getFromName(self):
"""
Get message from name
@return: name
"""
if(self.From.find('<')!= -1):
#mail similar than: Name Surmane
from_name = str(self.getAddressFrom[0])
else:
#something like: Name Surmane name@domain.com
from_name, from_mail = self.__parseFrom(self.From)
return Charset().encode(from_name)
def getFromMail(self):
"""
Get from mail
@return: mail
"""
if(self.From.find('<')!= -1):
#mail similar than: Name Surmane
return str(self.getAddressFrom[1])
else:
#something like: Name Surmane name@domain.com
from_name, from_mail = self.__parseFrom(self.From)
return from_mail
def getTo(self):
"""
Get To field
@return: to
"""
to = self.to
to = to.replace('@', self.config.getAntiSpam())
to = to.replace('<', '')
to = to.replace('>', '')
return to
def getSubject(self):
"""
Get subject
@return: subject
"""
return Charset().encode(self.subject)
def getDate(self):
"""
Get date
@return: date string
"""
return self.date
def getInReplyTo(self):
"""
Get in-reply-to field
@return: in-reply-to
"""
return self.inReplyTo
def getParent(self):
"""
Get parent message
@return: parent
"""
return self.parent
def getNextByDate(self):
"""
Get next message by date
@return: next
"""
return self.nextByDate
def getPreviousByDate(self):
"""
Get previous message by date
@return: previous
"""
return self.previousByDate
def getBody(self):
"""
Get message body content
@return: body
"""
return self.body
def toRDF(self):
"""
Print a message into RDF in XML format
"""
#rdf graph
store = ConjunctiveGraph()
#namespaces
store.bind('sioc', SIOC)
store.bind('foaf', FOAF)
store.bind('rdfs', RDFS)
store.bind('dc', DC)
store.bind('dct', DCT)
#message node
message = URIRef(self.getUri())
store.add((message, RDF.type, SIOC["Post"]))
#document node
doc = URIRef(self.getUri()+'.rdf')
store.add((doc, RDF.type, FOAF["Document"]))
store.add((doc, FOAF["primaryTopic"], message))
try:
store.add((message, SIOC['id'], Literal(self.getSwamlId())))
store.add((message, SIOC['link'], URIRef(self.getXhtmlUrl())))
store.add((message, SIOC['has_container'],URIRef(self.config.get('base')+'forum')))
store.add((message, SIOC["has_creator"], URIRef(self.getSender().getUri())))
store.add((message, DC['title'], Literal(self.getSubject())))
store.add((message, DCT['created'], Literal(self.getDate(), datatype=XSD[u'dateTime'])))
parent = self.getParent()
if (parent != None):
store.add((message, SIOC['reply_of'], URIRef(parent)))
if (len(self.childs) > 0):
for child in self.childs:
store.add((message, SIOC['has_reply'], URIRef(child)))
previous = self.getPreviousByDate()
if (previous != None):
store.add((message, SIOC['previous_by_date'], URIRef(previous)))
next = self.getNextByDate()
if (next != None):
store.add((message, SIOC['next_by_date'], URIRef(next)))
store.add((message, SIOC['content'], Literal(self.getBody())))
except Exception, detail:
print 'Error proccesing message ' + str(self.getId()) + ': ' + str(detail)
#and dump to disk
try:
rdf_file = open(self.getRdfPath(), 'w+')
rdf_file.write(store.serialize(format="pretty-xml"))
rdf_file.flush()
rdf_file.close()
except IOError, detail:
print 'IOError saving message ' + str(self.getId()) + ': ' + str(detail)
def toXHTML(self):
"""
Print a message into XHTML+RDFa format
"""
#root nodes
doc = getDOMImplementation().createDocument(None, 'html', None)
doctype = DocumentType("html")
doctype.publicId = "-//W3C//DTD XHTML+RDFa 1.0//EN"
doctype.systemId = "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd"
doc.doctype = doctype
root = doc.documentElement
root.setAttribute('xmlns', 'http://www.w3.org/1999/xhtml')
root.setAttribute('xmlns:sioc', str(SIOC))
root.setAttribute('xmlns:foaf', str(FOAF))
root.setAttribute('xmlns:dc', str(DC))
root.setAttribute('xmlns:dct', str(DCT))
root.setAttribute('xmlns:mvcb', str(MVCB))
root.setAttribute('xmlns:xsd', str(XSD))
head = doc.createElement('head')
root.appendChild(head)
head.setAttribute('profile', 'http://www.w3.org/2003/g/data-view')
link = doc.createElement('link')
link.setAttribute('rel', 'transformation')
link.setAttribute('href', 'http://www-sop.inria.fr/acacia/soft/RDFa2RDFXML.xsl')
head.appendChild(link)
link = doc.createElement('link')
link.setAttribute('rel', 'meta')
link.setAttribute('type', 'application/rdf+xml')
link.setAttribute('title', 'SIOC')
link.setAttribute('href', self.getRdfUrl())
head.appendChild(link)
link = doc.createElement('link')
link.setAttribute('rel', 'stylesheet')
link.setAttribute('type', 'text/css')
link.setAttribute('href', self.config.get('base')+'swaml.css')
head.appendChild(link)
title = doc.createElement('title')
title.appendChild(doc.createTextNode(self.getSubject()))
head.appendChild(title)
#body
body = doc.createElement('body')
body.setAttribute('typeof', 'foaf:Document')
body.setAttribute('about', self.getXhtmlUrl())
root.appendChild(body)
p = doc.createElement('p')
span = doc.createElement('span')
span.setAttribute('rel', 'foaf:primaryTopic')
span.setAttribute('href', self.getUri())
body.appendChild(p)
p.appendChild(span)
#post div
div = doc.createElement('div')
body.appendChild(div)
div.setAttribute('typeof', 'sioc:Post')
div.setAttribute('about', self.getUri())
#post fields
try:
h1 = doc.createElement('h1')
div.appendChild(h1)
h1.setAttribute('property', 'dc:title')
h1.appendChild(doc.createTextNode(self.getSubject()))
p = doc.createElement('p')
div.appendChild(p)
strong = doc.createElement('strong')
p.appendChild(strong)
strong.appendChild(doc.createTextNode('From: '))
a = doc.createElement('a')
a.setAttribute('rel', 'sioc:has_creator')
a.setAttribute('href', self.getSender().getUri())
a.appendChild(doc.createTextNode(self.getSender().getName()))
p.appendChild(a)
p = doc.createElement('p')
div.appendChild(p)
strong = doc.createElement('strong')
p.appendChild(strong)
strong.appendChild(doc.createTextNode('To: '))
a = doc.createElement('a')
a.setAttribute('rel', 'sioc:has_container')
a.setAttribute('href', self.config.get('base')+'forum')
if (len(self.config.get('title'))>0):
a.appendChild(doc.createTextNode(self.config.get('title')))
else:
a.appendChild(doc.createTextNode(self.config.get('base')+'forum'))
p.appendChild(a)
p = doc.createElement('p')
div.appendChild(p)
strong = doc.createElement('strong')
p.appendChild(strong)
strong.appendChild(doc.createTextNode('Date: '))
span = doc.createElement('span')
span.setAttribute('property', 'dct:created')
span.setAttribute('datatype', 'xsd:dateTime')
span.appendChild(doc.createTextNode(self.getDate()))
p.appendChild(span)
#p = doc.createElement('p')
#div.appendChild(p)
#strong = doc.createElement('strong')
#p.appendChild(strong)
#strong.appendChild(doc.createTextNode('Message-Id: '))
#span = doc.createElement('span')
#span.setAttribute('property', 'sioc:id')
#span.appendChild(doc.createTextNode(self.getSwamlId()))
#p.appendChild(span)
pre = doc.createElement('pre')
div.appendChild(pre)
pre.setAttribute('property', 'sioc:content')
pre.appendChild(doc.createTextNode(self.getBody())) #FIXME: parse URLs
p = doc.createElement('p')
div.appendChild(p)
p.appendChild(doc.createTextNode('URI: '))
a = doc.createElement('a')
a.setAttribute('href', self.getUri())
a.appendChild(doc.createTextNode(self.getUri()))
p.appendChild(a)
p = doc.createElement('p')
div.appendChild(p)
p.appendChild(doc.createTextNode('Link: '))
a = doc.createElement('a')
a.setAttribute('rel', 'sioc:link')
a.setAttribute('href', self.getXhtmlUrl())
a.appendChild(doc.createTextNode(self.getXhtmlUrl()))
p.appendChild(a)
parent = self.getParent()
if (parent != None):
p = doc.createElement('p')
div.appendChild(p)
p.appendChild(doc.createTextNode('Reply of: '))
a = doc.createElement('a')
a.setAttribute('rel', 'sioc:reply_of')
a.setAttribute('href', parent)
a.appendChild(doc.createTextNode(parent))
p.appendChild(a)
if (len(self.childs) > 0):
for child in self.childs:
p = doc.createElement('p')
div.appendChild(p)
p.appendChild(doc.createTextNode('Has reply: '))
a = doc.createElement('a')
a.setAttribute('rel', 'sioc:has_reply')
a.setAttribute('href', child)
a.appendChild(doc.createTextNode(child))
p.appendChild(a)
previous = self.getPreviousByDate()
if (previous != None):
p = doc.createElement('p')
div.appendChild(p)
p.appendChild(doc.createTextNode('Previous by Date: '))
a = doc.createElement('a')
a.setAttribute('rel', 'sioc:previous_by_date')
a.setAttribute('href', previous)
a.appendChild(doc.createTextNode(previous))
p.appendChild(a)
next = self.getNextByDate()
if (next != None):
p = doc.createElement('p')
div.appendChild(p)
p.appendChild(doc.createTextNode('Next by Date: '))
a = doc.createElement('a')
a.setAttribute('rel', 'sioc:next_by_date')
a.setAttribute('href', next)
a.appendChild(doc.createTextNode(next))
p.appendChild(a)
except Exception, detail:
print 'Error exporting to XHTML message ' + str(self.getId()) + ': ' + str(detail)
#credits
p = doc.createElement('p')
body.appendChild(p)
p.setAttribute('class', 'credits')
a = doc.createElement('a')
a.setAttribute('rel', 'mvcb:generatorAgent')
a.setAttribute('href', 'http://swaml.berlios.de/')
a.appendChild(doc.createTextNode('Generated by '))
abbr = doc.createElement('abbr')
abbr.setAttribute('title', 'Semantic Web Archives of Mailing Lists')
abbr.appendChild(doc.createTextNode('SWAML'))
a.appendChild(abbr)
p.appendChild(a)
#and dump to disk
try:
xhtml_file = open(self.getXhtmlPath(), 'w+') #FIXME
try:
xhtml_file.write(doc.toprettyxml(encoding="utf-8"))
except UnicodeDecodeError, detail:
xhtml_file.write("")
print 'Decode error saving message ' + str(self.getId()) + ': ' + str(detail)
xhtml_file.flush()
xhtml_file.close()
except IOError, detail:
print 'IOError saving message ' + str(self.getId()) + ': ' + str(detail)
def __calculateId(self):
"""
Calculate SWAML ID
@todo: obtain a better SWAML ID
"""
#id: hashcode of 'MessageId - Date + ID'
self.swamlId = sha.new(self.messageId + '-' + self.date + '-swaml-' + str(self.id)).hexdigest()
swaml-0.1.1/src/swaml/rdf/sioc/__init__.py 0000644 0001750 0001750 00000001160 10644225305 017730 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2007 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
swaml-0.1.1/src/swaml/rdf/sioc/mailinglist.py 0000644 0001750 0001750 00000023276 11362021137 020514 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2010 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Abstraction of a mailing list"""
import sys, os
import random
import datetime
from rdflib.Graph import ConjunctiveGraph
from rdflib import URIRef, Literal, BNode
from swaml.mail.mbox import Mbox
from swaml.rdf.sioc.subscribers import Subscribers
from swaml.rdf.sioc.message import Message
from swaml.rdf.sioc.index import Index
from swaml.rdf.namespaces import RDF, RDFS, SWAML, SIOC, SIOCT, FOAF, DC, MVCB
from swaml.common.date import FileDate
from shutil import copyfile
class MailingList:
"""
Mailing List abstraction
"""
def __init__(self, config, base="./", lang=None):
"""
Constructor method
@param config: configuration
@param lang: language
"""
self.config = config
self.base = base
self.lang = lang
self.subscribers = Subscribers(config)
self.index = Index(self.config)
self.uri = self.config.get('base') + 'forum'
def __createDir(self):
"""
Create the necessary directory
"""
if not (os.path.exists(self.config.get('dir'))):
os.mkdir(self.config.get('dir'))
def __parse(self):
"""
Parse mailingg list and load all indexes into memory
"""
previous = None
mbox = Mbox(self.config.get('mbox'))
messages = 0
message = mbox.nextMessage()
while(message != None):
try:
#fisrt load message
msg = Message(message, self.config)
#index it
self.index.add(msg)
self.subscribers.add(msg)
subscriber = self.subscribers.get(msg.getFromMail())
msg.setSender(subscriber)
#parent message (refactor)
inReplyTo = msg.getInReplyTo()
if (inReplyTo != None):
parent = self.index.get(inReplyTo)
if (parent != None):
msg.setParent(parent) #link child with parent
parent.addChild(msg) #and parent with child
#and previous and next by date
if (previous != None):
previous.setNextByDate(msg)
msg.setPreviousByDate(previous)
previous = msg
except KeyError, details:
print 'Error parsing a mail form mailbox: ' + str(details)
messages += 1
#and continue with next message
message = mbox.nextMessage()
self.messages = messages
def publish(self):
"""
Publish the messages
"""
self.__createDir()
#first lap
self.__parse()
#and second lap
mbox = Mbox(self.config.get('mbox'))
messages = 0
message = mbox.nextMessage()
while(message != None):
try:
messages += 1
try:
id = message['Message-Id']
except:
id = random.randint(1000000000, 9999999999) + "@localhost" #FIXME
print messages + "is not a valid RFC2822 message, it hasn't message-id field"
msg = self.index.getMessage(messages)
msg.setBody(message.fp.read())
msg.toRDF()
msg.toXHTML()
#self.index.delete(id)
except Exception, detail:
print 'Error processing message ' + str(messages) + ': ' + str(detail)
#import traceback
#traceback.print_exc(file=sys.stdout)
message = mbox.nextMessage()
self.__toRDF()
self.__toXHTML()
if (self.config.get('foaf')):
self.subscribers.process()
self.subscribers.export()
self.copyFiles()
self.generateApacheConf()
if (self.messages != messages):
print 'Something was wrong: ' + str(self.messages) + ' parsed, but ' + str(messages) + ' processed'
return messages
def __getUri(self):
"""
Get the mailing list URI
@return: uri
"""
return self.uri
def __addSite(self, graph, url):
"""
Add the site
@param graph: mailing list graph
@param url: site url
@todo: write a new class
"""
site = URIRef(url)
graph.add((site, RDF.type, SIOC['Site']))
graph.add((site, SIOC['host_of'], URIRef(self.__getUri())))
def __toRDF(self):
"""
Dump mailing list into a RDF file
"""
#rdf graph
store = ConjunctiveGraph()
#namespaces
store.bind('rdfs', RDFS)
store.bind('swaml', SWAML)
store.bind('sioc', SIOC)
store.bind('sioct', SIOCT)
store.bind('foaf', FOAF)
store.bind('dc', DC)
store.bind('mvcb', MVCB)
#fisrt the host graph
host = self.config.get('host')
if (len(host) > 0):
self.__addSite(store, host)
#and then the mailing list
list = URIRef(self.__getUri())
store.add((list, RDF.type, SIOC['Forum']))
#store.add((list, RDF.type, SIOCT['MailingList']))
#list information
title = self.config.get('title')
if (len(title) > 0):
store.add((list, DC['title'], Literal(title)))
description = self.config.get('description')
if (len(description) > 0):
store.add((list, DC['description'], Literal(description)))
if (len(host) > 0):
store.add((list, SIOC['has_host'], URIRef(host)))
store.add((list, SWAML['address'], Literal(self.config.get('to'))))
store.add((list, DC['date'], Literal(FileDate(self.config.get('mbox')).getStringFormat())))
store.add((list, MVCB['generatorAgent'], URIRef(self.config.getAgent())))
store.add((list, MVCB['errorReportsTo'], URIRef('http://swaml.berlios.de/bugs')))
if (self.lang != None):
store.add((list, DC['language'], Literal(self.lang)))
#subscribers
subscribers = self.subscribers.getSubscribersUris()
for uri in subscribers:
store.add((list, SIOC['has_subscriber'], URIRef(uri)))
store.add((URIRef(uri), RDF.type, SIOC['UserAccount']))
#and all messages
for msg in self.index.items:
uri = msg.getUri()
store.add((list, SIOC['container_of'], URIRef(uri)))
store.add((URIRef(uri), RDF.type, SIOC['Post']))
parent = msg.getParent()
if (parent != None):
store.add((URIRef(uri), SIOC['reply_of'], URIRef(parent)))
#and dump to disk
try:
rdf_file = open(self.config.get('dir')+'forum.rdf', 'w+')
rdf_file.write(store.serialize(format="pretty-xml"))
rdf_file.flush()
rdf_file.close()
except IOError, detail:
print 'Error exporting mailing list to RDF: ' + str(detail)
def __toXHTML(self):
pass
def copyFiles(self):
"""
Copy necessary files
"""
copyfile(self.base + 'includes/ui/web/swaml.css', self.config.get('dir')+'swaml.css')
def generateApacheConf(self):
"""
Generate a customized htaccess file
"""
#read template
data = ''
try:
file = open(self.base + '.includes/apache/htaccess-files.tpl')
for line in file:
data += line
file.close()
except:
print 'An exception occured reading apache template file'
base = self.config.get('base')
base = '/' + '/'.join(base.split('/')[3:])
data = data.replace('{BASE}', base)
#post/([0-9]{4}\-[A-Za-z]+/[0-9]+)$
#RewriteRule ^post/([0-9]{4})-([A-Za-z]+)/([0-9]+)$ $1-$2/post-$3.xhtml [R=303]
pattern = self.config.get('post')
pattern = pattern.replace('DD', '[0-9]{2}')
pattern = pattern.replace('MMMM', '[A-Za-z]{4}')
pattern = pattern.replace('MMM', '[A-Za-z]{3}')
pattern = pattern.replace('MM', '[0-9]{2}')
pattern = pattern.replace('YYYY', '[0-9]{4}')
pattern = pattern.replace('ID', '[0-9]+')
pattern = pattern.replace('-', '\-')
data = data.replace('{POSTURI}', '('+pattern+')')
data = data.replace('{POSTFILE}', '$1')
#and dump to disk
try:
file = open(self.config.get('dir')+'.htaccess', 'w+')
file.write(data)
file.flush()
file.close()
except IOError, detail:
print 'IOError saving message .htaccess file'
swaml-0.1.1/test/swaml/rdf/sioc/__init__.py 0000644 0001750 0001750 00000001161 10774155402 020125 0 ustar sergio sergio # -*- coding: utf8 -*-
# SWAML
# Semantic Web Archive of Mailing Lists
#
# Copyright (C) 2005-2008 Sergio Fernández
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.