305 lines
11 KiB
JavaScript
305 lines
11 KiB
JavaScript
{
|
||
"translatorID": "b1c90b99-2e1a-4374-a03b-92e45f1afc55",
|
||
"label": "Radio Free Europe / Radio Liberty",
|
||
"creator": "Avram Lyon",
|
||
"target": "^https?://(www\\.rferl\\.org/|www\\.azatliq\\.org/|www\\.azattyq\\.org/|rus\\.azattyq\\.org/|da\\.azadiradio\\.org/|pa\\.azadiradio\\.org/|www\\.azattyk\\.org/|www\\.ozodi\\.org/|www\\.ozodlik\\.org/|www\\.evropaelire\\.org/|www\\.slobodnaevropa\\.org/|www\\.makdenes\\.org/|www\\.iraqhurr\\.org/|www\\.radiofarda\\.com/|www\\.azatutyun\\.am/|www\\.azadliq\\.org/|www\\.svaboda\\.org/|www\\.svoboda\\.org/|www\\.tavisupleba\\.org/|www\\.azathabar\\.com/|www\\.svobodanews\\.ru/|www\\.europalibera\\.org/|www\\.radiosvoboda\\.org/)",
|
||
"minVersion": "2.1.9",
|
||
"maxVersion": "",
|
||
"priority": 100,
|
||
"inRepository": true,
|
||
"translatorType": 4,
|
||
"browserSupport": "gcsbv",
|
||
"lastUpdated": "2014-04-04 10:15:37"
|
||
}
|
||
|
||
/*
|
||
***** BEGIN LICENSE BLOCK *****
|
||
|
||
Radio Liberty Translator
|
||
Copyright © 2009-2011 Avram Lyon, ajlyon@gmail.com
|
||
|
||
This file is part of Zotero.
|
||
|
||
Zotero is free software: you can redistribute it and/or modify
|
||
it under the terms of the GNU Affero General Public License as published by
|
||
the Free Software Foundation, either version 3 of the License, or
|
||
(at your option) any later version.
|
||
|
||
Zotero is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU Affero General Public License for more details.
|
||
|
||
You should have received a copy of the GNU Affero General Public License
|
||
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
|
||
|
||
***** END LICENSE BLOCK *****
|
||
*/
|
||
|
||
|
||
/*
|
||
This translator works on articles posted on the websites of Radio Free Europe / Radio Liberty.
|
||
It imports the basic metadata the site provides, from normal article pages and from search
|
||
result pages.
|
||
|
||
The translator tries to work on all of the languages of RFE/RL; they should all work.
|
||
|
||
Editions:
|
||
English: http://www.rferl.org/
|
||
Tatar/Bashkir: http://www.azatliq.org/
|
||
Kazakh: http://www.azattyq.org/ (Kazakh)
|
||
http://rus.azattyq.org/ (Russian)
|
||
Afghan: http://da.azadiradio.org/ (Dari)
|
||
http://pa.azadiradio.org/ (Pashto)
|
||
Kirghiz: http://www.azattyk.org/
|
||
Tajik: http://www.ozodi.org/
|
||
Uzbek: http://www.ozodlik.org/
|
||
Albanian: http://www.evropaelire.org/
|
||
Bosnian/Montenegrin/Serbian:
|
||
http://www.slobodnaevropa.org/
|
||
Macedonian: http://www.makdenes.org/
|
||
Iraqi Arabic: http://www.iraqhurr.org/
|
||
Farsi: http://www.radiofarda.com/
|
||
Armenian: http://www.azatutyun.am/
|
||
Azerbaijani: http://www.azadliq.org/
|
||
Belarus: http://www.svaboda.org/
|
||
Georgian: http://www.tavisupleba.org/
|
||
Turkmen: http://www.azathabar.com/
|
||
Russian: http://www.svobodanews.ru/ and svoboda.org
|
||
Moldovan: http://www.europalibera.org/ (Romanian)
|
||
Ukrainian: http://www.radiosvoboda.org/
|
||
|
||
This translator does not yet attempt to work with the video files that Radio Liberty
|
||
hosts and produces; work with them must be left for a future revision.
|
||
|
||
It does try to save linked audio files for stories-- still nothing
|
||
for video content.
|
||
|
||
Another future improvement would be the facility to import from the front page and subject
|
||
pages. This is not yet possible.
|
||
|
||
Some of the services use non-standard ways of marking authorship, for example, the Pashto edition
|
||
places the author at the bottom of the article, but there is no clear way to scrape that
|
||
information and the translator does not load it.
|
||
*/
|
||
|
||
var item;
|
||
function detectWeb(doc, url){
|
||
if (url.match(/\/content\/|\/archive\/news|\/archive\/ru_news_zone/)) {
|
||
// The translator uses this type because RFE/RL generally has a place of publication
|
||
// and a Section; both are specific to newspaperArticle.
|
||
return "newspaperArticle";
|
||
} else if (url.match(/\/search\/\?k=.+/)){
|
||
return "multiple";
|
||
}
|
||
}
|
||
|
||
function doWeb(doc, url){
|
||
|
||
var articles = new Array();
|
||
if (detectWeb(doc, url) == "multiple") {
|
||
var results = doc.evaluate('//div[@class="searchResultItem"]', doc, null, XPathResult.ANY_TYPE, null);
|
||
var items = new Array();
|
||
var result;
|
||
while (result = results.iterateNext()) {
|
||
var link = doc.evaluate('./a[@class="resultLink"]', result, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||
var title = link.textContent;
|
||
var url = link.href;
|
||
items[url] = title;
|
||
}
|
||
Zotero.selectItems(items, function (items) {
|
||
if (!items) {
|
||
return true;
|
||
}
|
||
for (var i in items) {
|
||
articles.push(i);
|
||
}
|
||
Zotero.Utilities.processDocuments(articles, scrape);
|
||
});
|
||
} else {
|
||
scrape(doc, url);
|
||
}
|
||
|
||
function scrape(doc, url){
|
||
item = new Zotero.Item("newspaperArticle");
|
||
item.title = Zotero.Utilities.trimInternal(
|
||
doc.evaluate('//h1', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent
|
||
);
|
||
|
||
var author = doc.evaluate('//div[@id="article"]//div[@class="author"]', doc, null, XPathResult.ANY_TYPE, null);
|
||
if ((author = author.iterateNext()) !== null) {
|
||
author = author.textContent;
|
||
// Sometimes we have "By Author"
|
||
if (author.substr(0, 3).toLowerCase() == "by ") {
|
||
author = author.substr(3);
|
||
}
|
||
var cleaned = Zotero.Utilities.cleanAuthor(author, "author");
|
||
// If we have only one name, set the author to one-name mode
|
||
if (cleaned.firstName == "") {
|
||
cleaned["fieldMode"] = true;
|
||
} else {
|
||
// We can check for all lower-case and capitalize if necessary
|
||
// All-uppercase is handled by cleanAuthor
|
||
cleaned.firstName = (cleaned.firstName == cleaned.firstName.toLowerCase()) ?
|
||
Zotero.Utilities.capitalizeTitle(cleaned.firstName, true) : cleaned.firstName;
|
||
cleaned.lastName = (cleaned.lastName == cleaned.lastName.toLowerCase()) ?
|
||
Zotero.Utilities.capitalizeTitle(cleaned.lastName, true) : cleaned.lastName;
|
||
}
|
||
item.creators.push(cleaned);
|
||
}
|
||
// The section should _always_ be present
|
||
item.section = ZU.xpathText(doc, '//div[@id="article" or contains(@class, "middle_content")]/h2');
|
||
|
||
// This exposes a limitation of Zotero's date handling; the Afghan services
|
||
// use the Hijri calendar, and mixed sorting looks funny-- I'd like to be able
|
||
// to mark such dates to be handled appropriately
|
||
var date = doc.evaluate('//div[@id="article"]//p[@class="article_date"]', doc, null, XPathResult.ANY_TYPE, null);
|
||
if ((date = date.iterateNext()) !== null) {
|
||
// sometimes not present
|
||
item.date = Zotero.Utilities.trimInternal(date.textContent);
|
||
}
|
||
|
||
// We can also try to derive the location-- if the byline can be parsed
|
||
// Here, we assume that the byline uses all-caps for the location
|
||
// TODO Use more general all-caps character class, since this excludes special
|
||
// characters that may occur in city names.
|
||
// This all-caps class is borrowed from utilities.js and augmented by
|
||
// the basic Cyrillic capital letters.
|
||
var textnode = doc.evaluate('//div[@id="article"]//div[@class="zoomMe"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if (textnode) {
|
||
var text = textnode.textContent;
|
||
hits = text.match(/([A-ZА-Я \u0400-\u042f]+) \((.*)\) --/);
|
||
if (!hits) {
|
||
hits = text.match(/([A-ZА-Я \u0400-\u042f]+) --/);
|
||
}
|
||
if (hits) {
|
||
var place = Zotero.Utilities.capitalizeTitle(hits[1], true);
|
||
item.place = place;
|
||
// We add the wire service as an author; it would be nice to have a field for it
|
||
item.creators.push({lastName : hits[2], creatorType:"author", fieldMode:true});
|
||
}
|
||
}
|
||
|
||
item.url = url;
|
||
item.publicationTitle = doc.evaluate('//h2[@id="header_logo_anchor" or @id="header_logo"]//span', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.trim();
|
||
|
||
// Language map:
|
||
var map = {
|
||
"www.rferl.org" : "English",
|
||
"www.azatliq.org" : "Tatar/Bashkir",
|
||
"www.azattyq.org" : "Kazakh",
|
||
"rus.azattyq.org" : "Russian",
|
||
"da.azadiradio.org" : "Dari",
|
||
"pa.azadiradio.org" : "Pashto",
|
||
"www.azattyk.org" : "Kirghiz",
|
||
"www.ozodi.org" : "Tajik",
|
||
"www.ozodlik.org" : "Uzbek",
|
||
"www.evropaelire.org" : "Albanian",
|
||
"www.slobodnaevropa.org" : "Bosnian/Montenegrin/Serbian",
|
||
"www.makdenes.org" : "Macedonian",
|
||
"www.iraqhurr.org" : "Iraqi Arabic",
|
||
"www.radiofarda.com" : "Farsi",
|
||
"www.azatutyun.am" : "Armenian",
|
||
"www.azadliq.org" : "Azerbaijani",
|
||
"www.svaboda.org" : "Belarussian",
|
||
"www.tavisupleba.org" : "Georgian",
|
||
"www.azathabar.com" : "Turkmen",
|
||
"www.svobodanews.ru" : "Russian",
|
||
"www.svoboda.org" : "Russian",
|
||
"www.europalibera.org" : "Romanian",
|
||
"www.radiosvoboda.org" : "Ukrainian"
|
||
}
|
||
domain = doc.location.href.match(/https?:\/\/([^/]+)/);
|
||
item.language = map[domain[1]];
|
||
|
||
/* The printable version doesn't save nicely, unfortunately.
|
||
// Make printable URL for better saving
|
||
var printurl = url.replace(/(.*)\/.*\/(.*\.html)/,"$1/articleprintview/$2");
|
||
item.attachments.push({url:printurl, title:"RFE/RL Snapshot", mimeType:"text/html"});
|
||
*/
|
||
item.attachments.push({url:url, title: (item.publicationTitle + " Snapshot"), mimeType:"text/html"});
|
||
|
||
var listenLink = doc.evaluate('//li[@class="listenlink"]/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if (listenLink) {
|
||
Zotero.Utilities.doGet(listenLink.href, addAudio, null);
|
||
} else item.complete();
|
||
|
||
}
|
||
}
|
||
|
||
function addAudio(text) {
|
||
// http://realaudio.rferl.org/TB/2011/03/29/20110329-183936-TB-clip.mp3
|
||
var audio = text.match(/https?:\/\/(realaudio|audioarchive)\.rferl\.org[^"]*\.mp3/);
|
||
if (audio) item.attachments.push({url:audio[0], mimeType:"application/octet-stream", title:"RFE/RL Audio"})
|
||
item.complete();
|
||
}
|
||
|
||
/** BEGIN TEST CASES **/
|
||
var testCases = [
|
||
{
|
||
"type": "web",
|
||
"url": "http://www.azatliq.org/content/article/24281041.html",
|
||
"items": [
|
||
{
|
||
"itemType": "newspaperArticle",
|
||
"creators": [
|
||
{
|
||
"firstName": "Гүзәл",
|
||
"lastName": "Мәхмүтова",
|
||
"creatorType": "author"
|
||
}
|
||
],
|
||
"notes": [],
|
||
"tags": [],
|
||
"seeAlso": [],
|
||
"attachments": [
|
||
{
|
||
"url": false,
|
||
"title": " Азатлык Радиосы Snapshot",
|
||
"mimeType": "text/html"
|
||
},
|
||
{
|
||
"url": false,
|
||
"mimeType": "application/octet-stream",
|
||
"title": "RFE/RL Audio"
|
||
}
|
||
],
|
||
"title": "Татар яшьләре татарлыкны сакларга тырыша",
|
||
"section": "татарстан",
|
||
"date": "29.07.2011",
|
||
"url": "http://www.azatliq.org/content/article/24281041.html",
|
||
"publicationTitle": "Азатлык Радиосы",
|
||
"language": "Tatar/Bashkir",
|
||
"libraryCatalog": "Radio Free Europe / Radio Liberty"
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"type": "web",
|
||
"url": "http://www.svoboda.org/content/news/24382010.html",
|
||
"items": [
|
||
{
|
||
"itemType": "newspaperArticle",
|
||
"creators": [],
|
||
"notes": [],
|
||
"tags": [],
|
||
"seeAlso": [],
|
||
"attachments": [
|
||
{
|
||
"title": "Радио Свобода Snapshot",
|
||
"mimeType": "text/html"
|
||
}
|
||
],
|
||
"title": "Партия \"Яблоко\" перевела свою предвыборную программу на 18 языков",
|
||
"section": "Новости",
|
||
"date": "Опубликовано 05.11.2011 06:49",
|
||
"url": "http://www.svoboda.org/content/news/24382010.html",
|
||
"publicationTitle": "Радио Свобода",
|
||
"language": "Russian",
|
||
"libraryCatalog": "Radio Free Europe / Radio Liberty",
|
||
"accessDate": "CURRENT_TIMESTAMP"
|
||
}
|
||
]
|
||
}
|
||
]
|
||
/** END TEST CASES **/ |