825 lines
25 KiB
825 lines
25 KiB
"translatorID": "fc9b7700-b3cc-4150-ba89-c7e4443bd96d",
"label": "Financial Times",
"creator": "Sebastian Karcher",
"target": "^https?://(www|search|ftalphaville)\\.ft\\.com",
"minVersion": "2.1.9",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsv",
"lastUpdated": "2016-09-19 22:03:34"
/*********************** BEGIN FRAMEWORK ***********************/
Copyright (c) 2010-2013, Erik Hetzner
This program is free software: you can redistribute it and/or
modify it under the terms of the GNU Affero General Public License
as published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public
License along with this program. If not, see
* Flatten a nested array; e.g., [[1], [2,3]] -> [1,2,3]
function flatten(a) {
var retval = new Array();
for (var i in a) {
var entry = a[i];
if (entry instanceof Array) {
retval = retval.concat(flatten(entry));
} else {
return retval;
var FW = {
_scrapers : new Array()
FW._Base = function () {
this.callHook = function (hookName, item, doc, url) {
if (typeof this['hooks'] === 'object') {
var hook = this['hooks'][hookName];
if (typeof hook === 'function') {
hook(item, doc, url);
this.evaluateThing = function(val, doc, url) {
var valtype = typeof val;
if (valtype === 'object') {
if (val instanceof Array) {
/* map over each array val */
/* this.evaluate gets out of scope */
var parentEval = this.evaluateThing;
var retval = val.map ( function(i) { return parentEval (i, doc, url); } );
return flatten(retval);
} else {
return val.evaluate(doc, url);
} else if (valtype === 'function') {
return val(doc, url);
} else {
return val;
* makeItems is the function that does the work of making an item.
* doc: the doc tree for the item
* url: the url for the item
* attachments ...
* eachItem: a function to be called for each item made, with the arguments (doc, url, ...)
* ret: the function to call when you are done, with no args
this.makeItems = function (doc, url, attachments, eachItem, ret) {
FW.Scraper = function (init) {
FW._scrapers.push(new FW._Scraper(init));
FW._Scraper = function (init) {
for (x in init) {
this[x] = init[x];
this._singleFieldNames = [
"websiteType" ];
this._makeAttachments = function(doc, url, config, item) {
if (config instanceof Array) {
config.forEach(function (child) { this._makeAttachments(doc, url, child, item); }, this);
} else if (typeof config === 'object') {
/* plural or singual */
var urlsFilter = config["urls"] || config["url"];
var typesFilter = config["types"] || config["type"];
var titlesFilter = config["titles"] || config["title"];
var snapshotsFilter = config["snapshots"] || config["snapshot"];
var attachUrls = this.evaluateThing(urlsFilter, doc, url);
var attachTitles = this.evaluateThing(titlesFilter, doc, url);
var attachTypes = this.evaluateThing(typesFilter, doc, url);
var attachSnapshots = this.evaluateThing(snapshotsFilter, doc, url);
if (!(attachUrls instanceof Array)) {
attachUrls = [attachUrls];
for (var k in attachUrls) {
var attachUrl = attachUrls[k];
var attachType;
var attachTitle;
var attachSnapshot;
if (attachTypes instanceof Array) { attachType = attachTypes[k]; }
else { attachType = attachTypes; }
if (attachTitles instanceof Array) { attachTitle = attachTitles[k]; }
else { attachTitle = attachTitles; }
if (attachSnapshots instanceof Array) { attachSnapshot = attachSnapshots[k]; }
else { attachSnapshot = attachSnapshots; }
item["attachments"].push({ url : attachUrl,
title : attachTitle,
mimeType : attachType,
snapshot : attachSnapshot });
this.makeItems = function (doc, url, ignore, eachItem, ret) {
var item = new Zotero.Item(this.itemType);
item.url = url;
for (var i in this._singleFieldNames) {
var field = this._singleFieldNames[i];
if (this[field]) {
var fieldVal = this.evaluateThing(this[field], doc, url);
if (fieldVal instanceof Array) {
item[field] = fieldVal[0];
} else {
item[field] = fieldVal;
var multiFields = ["creators", "tags"];
for (var j in multiFields) {
var key = multiFields[j];
var val = this.evaluateThing(this[key], doc, url);
if (val) {
for (var k in val) {
this._makeAttachments(doc, url, this["attachments"], item);
eachItem(item, this, doc, url);
FW._Scraper.prototype = new FW._Base;
FW.MultiScraper = function (init) {
FW._scrapers.push(new FW._MultiScraper(init));
FW._MultiScraper = function (init) {
for (x in init) {
this[x] = init[x];
this._mkSelectItems = function(titles, urls) {
var items = new Object;
for (var i in titles) {
items[urls[i]] = titles[i];
return items;
this._selectItems = function(titles, urls, callback) {
var items = new Array();
Zotero.selectItems(this._mkSelectItems(titles, urls), function (chosen) {
for (var j in chosen) {
this._mkAttachments = function(doc, url, urls) {
var attachmentsArray = this.evaluateThing(this['attachments'], doc, url);
var attachmentsDict = new Object();
if (attachmentsArray) {
for (var i in urls) {
attachmentsDict[urls[i]] = attachmentsArray[i];
return attachmentsDict;
/* This logic is very similar to that used by _makeAttachments in
* a normal scraper, but abstracting it out would not achieve much
* and would complicate it. */
this._makeChoices = function(config, doc, url, choiceTitles, choiceUrls) {
if (config instanceof Array) {
config.forEach(function (child) { this._makeTitlesUrls(child, doc, url, choiceTitles, choiceUrls); }, this);
} else if (typeof config === 'object') {
/* plural or singual */
var urlsFilter = config["urls"] || config["url"];
var titlesFilter = config["titles"] || config["title"];
var urls = this.evaluateThing(urlsFilter, doc, url);
var titles = this.evaluateThing(titlesFilter, doc, url);
var titlesIsArray = (titles instanceof Array);
if (!(urls instanceof Array)) {
urls = [urls];
for (var k in urls) {
var myUrl = urls[k];
var myTitle;
if (titlesIsArray) { myTitle = titles[k]; }
else { myTitle = titles; }
this.makeItems = function(doc, url, ignore, eachItem, ret) {
if (this.beforeFilter) {
var newurl = this.beforeFilter(doc, url);
if (newurl != url) {
this.makeItems(doc, newurl, ignore, eachItem, ret);
var titles = [];
var urls = [];
this._makeChoices(this["choices"], doc, url, titles, urls);
var attachments = this._mkAttachments(doc, url, urls);
var parentItemTrans = this.itemTrans;
this._selectItems(titles, urls, function (itemsToUse) {
if(!itemsToUse) {
} else {
var cb = function (doc1) {
var url1 = doc1.documentURI;
var itemTrans = parentItemTrans;
if (itemTrans === undefined) {
itemTrans = FW.getScraper(doc1, url1);
if (itemTrans === undefined) {
/* nothing to do */
} else {
itemTrans.makeItems(doc1, url1, attachments[url1],
eachItem, function() {});
Zotero.Utilities.processDocuments(itemsToUse, cb, ret);
FW._MultiScraper.prototype = new FW._Base;
FW.WebDelegateTranslator = function (init) {
return new FW._WebDelegateTranslator(init);
FW._WebDelegateTranslator = function (init) {
for (x in init) {
this[x] = init[x];
this.makeItems = function(doc, url, attachments, eachItem, ret) {
// need for scoping
var parentThis = this;
var translator = Zotero.loadTranslator("web");
translator.setHandler("itemDone", function(obj, item) {
eachItem(item, parentThis, doc, url);
if (this.translatorId) {
} else {
translator.setHandler("translators", function(obj, translators) {
if (translators.length) {
FW._WebDelegateTranslator.prototype = new FW._Base;
FW._StringMagic = function () {
this._filters = new Array();
this.addFilter = function(filter) {
return this;
this.split = function(re) {
return this.addFilter(function(s) {
return s.split(re).filter(function(e) { return (e != ""); });
this.replace = function(s1, s2, flags) {
return this.addFilter(function(s) {
if (s.match(s1)) {
return s.replace(s1, s2, flags);
} else {
return s;
this.prepend = function(prefix) {
return this.replace(/^/, prefix);
this.append = function(postfix) {
return this.replace(/$/, postfix);
this.remove = function(toStrip, flags) {
return this.replace(toStrip, '', flags);
this.trim = function() {
return this.addFilter(function(s) { return Zotero.Utilities.trim(s); });
this.trimInternal = function() {
return this.addFilter(function(s) { return Zotero.Utilities.trimInternal(s); });
this.match = function(re, group) {
if (!group) group = 0;
return this.addFilter(function(s) {
var m = s.match(re);
if (m === undefined || m === null) { return undefined; }
else { return m[group]; }
this.cleanAuthor = function(type, useComma) {
return this.addFilter(function(s) { return Zotero.Utilities.cleanAuthor(s, type, useComma); });
this.key = function(field) {
return this.addFilter(function(n) { return n[field]; });
this.capitalizeTitle = function() {
return this.addFilter(function(s) { return Zotero.Utilities.capitalizeTitle(s); });
this.unescapeHTML = function() {
return this.addFilter(function(s) { return Zotero.Utilities.unescapeHTML(s); });
this.unescape = function() {
return this.addFilter(function(s) { return unescape(s); });
this._applyFilters = function(a, doc1) {
for (i in this._filters) {
a = flatten(a);
/* remove undefined or null array entries */
a = a.filter(function(x) { return ((x !== undefined) && (x !== null)); });
for (var j = 0 ; j < a.length ; j++) {
try {
if ((a[j] === undefined) || (a[j] === null)) { continue; }
else { a[j] = this._filters[i](a[j], doc1); }
} catch (x) {
a[j] = undefined;
Zotero.debug("Caught exception " + x + "on filter: " + this._filters[i]);
/* remove undefined or null array entries */
/* need this twice because they could have become undefined or null along the way */
a = a.filter(function(x) { return ((x !== undefined) && (x !== null)); });
return flatten(a);
FW.PageText = function () {
return new FW._PageText();
FW._PageText = function() {
this._filters = new Array();
this.evaluate = function (doc) {
var a = [doc.documentElement.innerHTML];
a = this._applyFilters(a, doc);
if (a.length == 0) { return false; }
else { return a; }
FW._PageText.prototype = new FW._StringMagic();
FW.Url = function () { return new FW._Url(); };
FW._Url = function () {
this._filters = new Array();
this.evaluate = function (doc, url) {
var a = [url];
a = this._applyFilters(a, doc);
if (a.length == 0) { return false; }
else { return a; }
FW._Url.prototype = new FW._StringMagic();
FW.Xpath = function (xpathExpr) { return new FW._Xpath(xpathExpr); };
FW._Xpath = function (_xpath) {
this._xpath = _xpath;
this._filters = new Array();
this.text = function() {
var filter = function(n) {
if (typeof n === 'object' && n.textContent) { return n.textContent; }
else { return n; }
return this;
this.sub = function(xpath) {
var filter = function(n, doc) {
var result = doc.evaluate(xpath, n, null, XPathResult.ANY_TYPE, null);
if (result) {
return result.iterateNext();
} else {
return undefined;
return this;
this.evaluate = function (doc) {
var res = doc.evaluate(this._xpath, doc, null, XPathResult.ANY_TYPE, null);
var resultType = res.resultType;
var a = new Array();
if (resultType == XPathResult.STRING_TYPE) {
} else if (resultType == XPathResult.BOOLEAN_TYPE) {
} else if (resultType == XPathResult.NUMBER_TYPE) {
} else if (resultType == XPathResult.ORDERED_NODE_ITERATOR_TYPE ||
var x;
while ((x = res.iterateNext())) { a.push(x); }
a = this._applyFilters(a, doc);
if (a.length == 0) { return false; }
else { return a; }
FW._Xpath.prototype = new FW._StringMagic();
FW.detectWeb = function (doc, url) {
for (var i in FW._scrapers) {
var scraper = FW._scrapers[i];
var itemType = scraper.evaluateThing(scraper['itemType'], doc, url);
var v = scraper.evaluateThing(scraper['detect'], doc, url);
if (v.length > 0 && v[0]) {
return itemType;
return undefined;
FW.getScraper = function (doc, url) {
var itemType = FW.detectWeb(doc, url);
return FW._scrapers.filter(function(s) {
return (s.evaluateThing(s['itemType'], doc, url) == itemType)
&& (s.evaluateThing(s['detect'], doc, url));
FW.doWeb = function (doc, url) {
var scraper = FW.getScraper(doc, url);
scraper.makeItems(doc, url, [],
function(item, scraper, doc, url) {
scraper.callHook('scraperDone', item, doc, url);
if (!item['title']) {
item['title'] = "";
function() {
/*********************** END FRAMEWORK ***********************/
Financial Times Translator
Copyright © 2011 Sebastian Karcher and CHNM
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
function detectWeb(doc, url) { return FW.detectWeb(doc, url); }
function doWeb(doc, url) { return FW.doWeb(doc, url); }
/** Blog */
itemType : 'blogPost',
detect : FW.Xpath('//h2[@class="entry-title"]'),
title : FW.Xpath('//h2[@class="entry-title"]').text().trim(),
attachments : [
url: FW.Url(),
title: "Financial Times Snapshot",
type: "text/html"}],
creators : FW.Xpath('//span[@class="author_byline"]/a').text().replace(/^\s*by\s*/, "").cleanAuthor("author"),
date : FW.Xpath('//span[contains(@class, "entry-date")]').text(),
ISSN : "0307-1766",
publicationTitle : "Financial Times"
/** Articles */
itemType : 'newspaperArticle',
detect : FW.Xpath('//meta[@property="og:type" and @content="article" and //div[contains(@class, "fullstory")]//h1]'),
title : FW.Xpath('//div[contains(@class, "fullstory")]//h1').text().trim(),
attachments : [
url: FW.Url(),
title: "Financial Times Snapshot",
type: "text/html"}],
creators : FW.Xpath('//p[contains(@class, "byline")]/span').text().replace(/^By\s*/, "").remove(/ in .+/).split(/,| and /).cleanAuthor("author"),
date : FW.Xpath('//p[@id="publicationDate"]/span[@class="time"]').text(),
abstractNote : FW.Xpath('//meta[@name="description"]/@content').text(),
ISSN : "0307-1766",
issue : FW.Xpath('//div[contains(@class, "article-issue")]//div/a').text().trim(),
publicationTitle : "Financial Times"
itemType : 'multiple',
detect : FW.Url().match(/\/search\?/),
choices : {
titles : FW.Xpath('//li[contains(@class, "result")]/h3/a[contains(@href, "www.ft.com")]').text().trim(),
urls : FW.Xpath('//li[contains(@class, "result")]/h3/a[contains(@href, "www.ft.com")]').key("href")
var testCases = [
"type": "web",
"url": "http://blogs.ft.com/beyond-brics/2012/01/02/12-for-2012-brazils-import-substitution-2-0/?Authorised=false#axzz1iLZdoFBr",
"items": [
"itemType": "blogPost",
"title": "12 for 2012: Brazil’s import substitution industrialisation 2.0",
"creators": [],
"date": "Jan 02 2012 15:00",
"blogTitle": "Financial Times",
"shortTitle": "12 for 2012",
"url": "http://blogs.ft.com/beyond-brics/2012/01/02/12-for-2012-brazils-import-substitution-2-0/?Authorised=false#axzz1iLZdoFBr",
"attachments": [
"title": "Financial Times Snapshot",
"mimeType": "text/html"
"tags": [],
"notes": [],
"seeAlso": []
"type": "web",
"url": "http://www.ft.com/intl/cms/s/2/0d506e0e-1583-11e1-b9b8-00144feabdc0.html#axzz1hzl2SwPD",
"items": [
"itemType": "newspaperArticle",
"title": "Inside McKinsey",
"creators": [
"firstName": "Andrew",
"lastName": "Hill",
"creatorType": "author"
"date": "November 25, 2011 9:32 pm",
"ISSN": "0307-1766",
"abstractNote": "When 1,200 partners of McKinsey&Company – the elite of global consulting – arrived at the Gaylord National Hotel & Convention Center, outside Washington DC, early on the morning of March 15 this year, they found themselves where they least wanted to",
"libraryCatalog": "Financial Times",
"publicationTitle": "Financial Times",
"url": "http://www.ft.com/intl/cms/s/2/0d506e0e-1583-11e1-b9b8-00144feabdc0.html#axzz1hzl2SwPD",
"attachments": [
"title": "Financial Times Snapshot",
"mimeType": "text/html"
"tags": [],
"notes": [],
"seeAlso": []
"type": "web",
"url": "http://www.ft.com/intl/cms/s/30c4c46e-35e2-11e1-9f98-00144feabdc0,Authorised=false.html?_i_location=http%3A%2F%2Fwww.ft.com%2Fcms%2Fs%2F0%2F30c4c46e-35e2-11e1-9f98-00144feabdc0.html&_i_referer=http%3A%2F%2Fsearch.ft.com%2Fsearch%3FqueryText%3Dargentina%26ftsearchType%3Dtype_news#axzz1iRbmkQzE",
"items": [
"itemType": "newspaperArticle",
"title": "China and France chase US shale assets",
"creators": [
"firstName": "Ed",
"lastName": "Crooks",
"creatorType": "author"
"firstName": "James",
"lastName": "Boxell",
"creatorType": "author"
"firstName": "Adam",
"lastName": "Jones",
"creatorType": "author"
"date": "January 3, 2012 7:30 pm",
"ISSN": "0307-1766",
"abstractNote": "Chinese and French companies have announced large investments in US shale oil and gas projects as they seek to benefit from the country’s controversial boom in “unconventional” resources. Sinopec, China’s second-largest oil company by market",
"libraryCatalog": "Financial Times",
"publicationTitle": "Financial Times",
"url": "http://www.ft.com/intl/cms/s/30c4c46e-35e2-11e1-9f98-00144feabdc0,Authorised=false.html?_i_location=http%3A%2F%2Fwww.ft.com%2Fcms%2Fs%2F0%2F30c4c46e-35e2-11e1-9f98-00144feabdc0.html&_i_referer=http%3A%2F%2Fsearch.ft.com%2Fsearch%3FqueryText%3Dargentina%26ftsearchType%3Dtype_news#axzz1iRbmkQzE",
"attachments": [
"title": "Financial Times Snapshot",
"mimeType": "text/html"
"tags": [],
"notes": [],
"seeAlso": []
"type": "web",
"url": "http://search.ft.com/search?queryText=argentina&ftsearchType=on",
"items": "multiple"