302 lines
9.2 KiB
JavaScript
302 lines
9.2 KiB
JavaScript
{
|
|
"translatorID": "7bdb79e-a47f-4e3d-b317-ccd5a0a74456",
|
|
"label": "Factiva",
|
|
"creator": "Philipp Zumstein and Aurimas Vinckevicius",
|
|
"target": "^https?://(global\\.factiva\\.com|[^/]*\\bglobal-factiva-com\\b[^/]+)/([gh]a|redir|np)/default\\.aspx",
|
|
"minVersion": "4.0",
|
|
"maxVersion": "",
|
|
"priority": 100,
|
|
"inRepository": true,
|
|
"translatorType": 4,
|
|
"browserSupport": "gcsv",
|
|
"lastUpdated": "2015-02-13 21:54:59"
|
|
}
|
|
|
|
/*
|
|
***** BEGIN LICENSE BLOCK *****
|
|
|
|
Factiva Translator, Copyright © 2014 Philipp Zumstein
|
|
This file is part of Zotero.
|
|
|
|
Zotero is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Affero General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
Zotero is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Affero General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
***** END LICENSE BLOCK *****
|
|
*/
|
|
|
|
|
|
function detectWeb(doc, url) {
|
|
if (doc.body.classList.contains('articleView')) {
|
|
// This is not sufficient for multiples, because the class does not change when filtering results
|
|
Z.monitorDOMChanges(doc.body, {attributes: true, attributeFilter: ['class']});
|
|
return "newspaperArticle";
|
|
}
|
|
|
|
var splitter = doc.getElementById('hldSplitter');
|
|
if (splitter) Z.monitorDOMChanges(splitter, { attributes: true, attributeFilter: ['style'] });
|
|
if (getSearchResults(doc, true)) return "multiple";
|
|
}
|
|
|
|
function getSearchResults(doc, checkOnly) {
|
|
var items = {}, found = false;
|
|
var rows = doc.getElementById('headlines');
|
|
if (!rows) return false;
|
|
rows = rows.getElementsByTagName('tr');
|
|
for (var i=0; i<rows.length; i++) {
|
|
var count = rows[i].getElementsByClassName('count')[0];
|
|
if (!count) count = "";
|
|
else count = count.textContent.replace(/^\s*(\d+)[\s\S]*/, '$1') + '. ';
|
|
|
|
var title = rows[i].getElementsByTagName('a')[0];
|
|
if (!title) continue;
|
|
|
|
var hdl = rows[i].getElementsByTagName('input')[0];
|
|
if (!hdl) continue;
|
|
|
|
if (checkOnly) return true;
|
|
found = true;
|
|
|
|
var link = title.href.replace(/#.*/, '');
|
|
items[hdl.value] = ZU.trimInternal(title.textContent);
|
|
}
|
|
|
|
return found ? items : false;
|
|
}
|
|
|
|
function doWeb(doc, url) {
|
|
if (detectWeb(doc, url) == "multiple") {
|
|
Zotero.selectItems(getSearchResults(doc), function (items) {
|
|
if (!items) return true;
|
|
|
|
var hdls = [];
|
|
for (var i in items) {
|
|
hdls.push(i);
|
|
}
|
|
scrape(doc, hdls, url);
|
|
});
|
|
} else {
|
|
var hdl = doc.getElementById('_hdl');
|
|
if (!hdl) throw new Error('Could not locate hdl');
|
|
scrape(doc, [hdl.value], url);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Gather form values. Very closely follows behavior of FACTIVA itself
|
|
*/
|
|
function getPostParams(doc) {
|
|
var form = doc.forms.namedItem('PageBaseForm');
|
|
if (!form) throw new Error('Could not find PageBaseForm');
|
|
|
|
var params = [],
|
|
fetchFromForm = ['_XFORMSESSSTATE', 'hls', 'elks', 'istphst', 'sri', 'usageAggregator'],
|
|
fetchById = ['ao', 'aod', 'iisac', 'ipfCtrl', 'hideahdr'],
|
|
name, input, value;
|
|
|
|
for (var i=0; i<fetchFromForm.length; i++) {
|
|
name = fetchFromForm[i];
|
|
input = form.elements.namedItem(name);
|
|
if (!input) continue;
|
|
|
|
value = input.value;
|
|
|
|
if (name == '_XFORMSESSSTATE') {
|
|
value = value.replace(/\+/g, "%2b").replace(/\=/g, "%3d");
|
|
} else if (name == 'usageAggregator') {
|
|
name = 'fdn';
|
|
} else if (name == 'hls') {
|
|
value = value.replace(/\+/g, "%2b").replace(/\=/g, "%3d").replace(/&/g, "%26");
|
|
}
|
|
|
|
params.push(name + '=' + value);
|
|
}
|
|
|
|
for (var i=0; i<fetchById.length; i++) {
|
|
name = fetchById[i];
|
|
input = doc.getElementById(name);
|
|
if (!input && name != 'iisac') continue;
|
|
|
|
if (name != 'iisac') {
|
|
value = input.value;
|
|
} else {
|
|
value = input ? input.value : 0;
|
|
}
|
|
|
|
if (name == 'ipfCtrl') {
|
|
name = 'ipf'
|
|
value = input.getAttribute('value'); // Not actually inputs
|
|
}
|
|
|
|
params.push(name + '=' + value);
|
|
}
|
|
|
|
return params;
|
|
}
|
|
|
|
function buildQueries(baseParams, hdls) {
|
|
var hdlSet,
|
|
arc = hdls.length,
|
|
ari = 1,
|
|
baseStr = baseParams.join('&') + (baseParams.length ? '&' : ''),
|
|
queries = [];
|
|
while ((hdlSet = hdls.splice(0, Math.min(hdls.length, (ari == 1 ? 1 : 14)))).length) {
|
|
queries.push(
|
|
baseStr
|
|
+ 'hdl=[' + escape(hdlSet.join(',')) + ']'
|
|
+ '&enableAd=' + (ari == 1)
|
|
+ '&arc=' + arc + '&ari=' + ari
|
|
+ '&dfd=FULR'
|
|
);
|
|
ari += hdlSet.length;
|
|
}
|
|
return queries;
|
|
}
|
|
|
|
function scrape(doc, hdls) {
|
|
var queries = buildQueries(getPostParams(doc), hdls),
|
|
headers = {
|
|
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
|
|
};
|
|
fetchQueries('/ha/haservice.aspx', queries, headers, doc);
|
|
}
|
|
|
|
function fetchQueries(url, queries, headers, doc) {
|
|
if (!queries.length) return;
|
|
ZU.doPost(url, queries.shift(), function(text) {
|
|
var div = doc.createElement('div');
|
|
div.innerHTML = text;
|
|
var articles = div.getElementsByClassName('article');
|
|
if (!articles.length) {
|
|
Z.debug('Could not locate metadata');
|
|
Z.debug(text);
|
|
}
|
|
|
|
scrapeArticles(articles);
|
|
if (queries.length) fetchQueries(url, queries, headers, doc);
|
|
}, headers)
|
|
}
|
|
|
|
function scrapeArticles(articles) {
|
|
for (var i=0; i<articles.length; i++) {
|
|
if (articles[i].id.indexOf('article-') != 0) continue; // nested div
|
|
var rows = articles[i].getElementsByTagName('tr');
|
|
var element = {};
|
|
for (var j=0; j<rows.length; j++) {
|
|
var data = rows[j].getElementsByTagName('td');
|
|
if (data.length != 2) continue;
|
|
|
|
var index, value;
|
|
if (data[0].classList.contains('index')) {
|
|
index = data[0];
|
|
value = data[1];
|
|
} else {
|
|
// left-to-right languages
|
|
index = data[1];
|
|
value = data[0];
|
|
}
|
|
|
|
index = index.textContent.trim();
|
|
if (index != 'TD') value = ZU.trimInternal(value.textContent);
|
|
element[index] = value;
|
|
}
|
|
|
|
var newItem = new Zotero.Item("newspaperArticle");
|
|
|
|
newItem.title = element["HD"];
|
|
newItem.publicationTitle = element["SN"];
|
|
newItem.section = element["SE"];
|
|
|
|
if (element["PD"]) {
|
|
dateArray = element["PD"].split(/ |\. ?/);
|
|
if (dateArray.length == 5) {//in Spanish e.g. [8 de diciembre de 2013
|
|
dateArray = [dateArray[0], dateArray[2], dateArray[4] ];
|
|
}
|
|
if (dateArray.length == 3) {//e.g. [8, December, 2013]
|
|
//order: German, English, French, Italian, Spanish (no dublicates)
|
|
var monthsMap = { "Januar":"01", "January":"01", "janvier":"01", "gennaio":"01", "enero":"01",
|
|
"Februar":"02", "February":"02", "février":"02", "febbraio":"02", "febrero":"02",
|
|
"März":"03", "March":"03", "mars":"03", "marzo":"03",
|
|
"April":"04", "avril":"04", "aprile":"04", "april":"04",
|
|
"Mai":"05", "May":"05", "mai":"05", "maggio":"05", "mayo":"05",
|
|
"Juni":"06", "June":"06", "juin":"06", "giugno":"06", "junio":"06",
|
|
"Juli":"07", "July":"07", "juillet":"07", "luglio":"07", "julio":"07",
|
|
"August":"08", "août":"08", "agosto":"08",
|
|
"September":"09", "septembre":"09", "settembre":"09", "septiembre":"09",
|
|
"Oktober":"10", "October":"10", "octobre":"10", "ottobre":"10", "octubre":"10",
|
|
"November":"11", "novembre":"11", "noviembre":"11",
|
|
"Dezember":"12", "December":"12", "décembre":"12", "dicembre":"12", "dicembre":"12", "diciembre":"12"
|
|
|
|
};
|
|
if (dateArray[1] in monthsMap) dateArray[1] = monthsMap[dateArray[1]];
|
|
if (dateArray[0].length == 1) dateArray[0] = "0"+dateArray[0];
|
|
var dateString = dateArray[2]+"-"+dateArray[1]+"-"+dateArray[0];
|
|
newItem.date = dateString;
|
|
} else {
|
|
newItem.date = element["PD"];
|
|
}
|
|
}
|
|
|
|
newItem.edition = element["ED"];
|
|
newItem.abstractNote = element["LP"];
|
|
newItem.pages = element["PG"];
|
|
newItem.publisher = element["PUB"];
|
|
newItem.language = element["LA"];
|
|
newItem.volume = element["VOL"];
|
|
newItem.rights = element["CY"];
|
|
|
|
// Eventually replace this with PDF of the "Full Article" view
|
|
if (element['TD']) {
|
|
var html = element['TD'].innerHTML
|
|
.replace(/<\/?b>/g, '')
|
|
.replace(/<\/?a[^>]*>/g, '');
|
|
newItem.notes.push({note:ZU.trimInternal(html)});
|
|
}
|
|
|
|
var authors = new Array();
|
|
if (element["AU"]) {
|
|
authors = element["AU"].split(",");
|
|
} else if (element["BY"]) {
|
|
var byline = ZU.trimInternal(element["BY"].replace(/By/i, ""));
|
|
authors = byline.split(/(?:\&| and |,| et )/i);
|
|
}
|
|
for (var j=0; j<authors.length; j++) {
|
|
newItem.creators.push(ZU.cleanAuthor(authors[j], "author"));
|
|
}
|
|
|
|
//company: element["CO"] --> seems fine as tags
|
|
//industry: element["IN"] --> broad but still okay
|
|
//element["NS"] --> too messy
|
|
//regions: element["RE"] --> too broad, messy
|
|
var tagString = element["CO"];
|
|
if (!tagString) {
|
|
tagString = element["IN"];
|
|
} else if (element["IN"]) {
|
|
tagString += " | "+element["IN"];
|
|
}
|
|
if (tagString) {
|
|
var tagArray = tagString.split("|");
|
|
for (var j=0; j<tagArray.length; j++) {
|
|
var tagCodeNamePair = tagArray[j].split(":");
|
|
newItem.tags.push(ZU.trimInternal(tagCodeNamePair[1]));
|
|
}
|
|
}
|
|
|
|
if (element["AN"]) {
|
|
element["AN"] = element["AN"].split(" ")[1];
|
|
var exportUrl = 'http://global.factiva.com/redir/default.aspx?P=sa&an=' + encodeURIComponent(element["AN"]) + '&cat=a&ep=ASE';
|
|
newItem.url = exportUrl;
|
|
}
|
|
|
|
newItem.complete();
|
|
}
|
|
} |