w_SearchByLabel(string, mode=‘inlabel’, langs=““, langsorder=’’, instanceof=”“, Pproperty=”“, debug=FALSE)
w_SearchByOccupation(Qoc, mode=c(‘default’,‘count’,‘wikipedias’), langsorder=’‘, wikilangs=’’, nlimit=10000, debug=FALSE)
w_isInstanceOf(entity_list, instanceof=’’, nlimit=50000, debug=FALSE)
w_Wikipedias(entity_list, wikilangs=““, instanceof=’’, nlimit=1500, debug=FALSE)
w_isValid(entity_list, nlimit=50000, debug=FALSE)
w_Property(entity_list, Pproperty, includeQ=FALSE, langsorder=‘en’, nlimit=10000, debug=FALSE)
w_SearchByAuthority(Pauthority, langsorder=’‘, instanceof=’’, nlimit=10000, debug=FALSE)
Pauthority = Authority Database Property in Wikidata
w_EntityInfo(entity_list, mode=‘default’, langsorder=’’, wikilangs=““, nlimit=MW_LIMIT, debug=FALSE)
m_Opensearch(string, project=‘en.wikipedia.org’, profile=“engine_autoselect”, redirects=“resolve”)
m_reqMediaWiki(titles, mode=c(‘wikidataEntity’,‘redirects’,‘pagePrimaryImage’,‘pageFiles’), project=‘en.wikipedia.org’, redirects=TRUE, exclude_ext=‘svg|webp|xcf’)
m_Pageviews(article, start, end, project=“en.wikipedia.org”, access=“all-access”, agent=“user”, granularity=“monthly”, redirects=FALSE)
m_XtoolsInfo(article, infotype=c(“articleinfo”, “prose”, “links”), project=“en.wikipedia.org”, redirects=FALSE)
v_AutoSuggest(author) : obtains viafID
v_Search(CQL_Query, mode=c(‘default’, ‘anyField’, ‘allmainHeadingEl’, ‘allNames’, ‘allPersonalNames’, ‘allTitle’), schema=c(‘brief’, ‘JSON’)) : obtains clusters records
v_GetRecord(viafid, record_format=‘viaf.json’): retrieve a cluster record
v_Extract(viaf, info, source=NULL)
To install and load the updated version of the wikiTools package simply run the following commands:
Exact search in Label or exact search in AltLabel (case sensitive and diacritics)
Optional: limit by instanceof Wikidata class (Qxx).
Optional: return information of some properties (Pproperties, Pxxx).
df <- w_SearchByLabel(string='Iranzo', langsorder='es|en')
df <- w_SearchByLabel(string='Iranzo', langsorder='es|en', instanceof = 'Q5')
df <- w_SearchByLabel(string='Iranzo', langsorder='es|en', instanceof = 'Q5|Q101352')
df <- w_SearchByLabel(string='Iranzo', langsorder='es|en', instanceof = 'Q5',
Pproperty = 'P21|P569|P570')
df <- w_SearchByLabel(string='Iranzo', lang='en', langsorder='es|en', mode='startswith')
df <- w_SearchByLabel(string='Iranzo', lang='en', langsorder='es|en', instanceof = 'Q5',
df <- w_SearchByLabel(string='Iranzo', lang='en', langsorder='es|en',
instanceof = 'Q5|Q101352', mode='startswith')
df <- w_SearchByLabel(string='Iranzo', lang='en', langsorder='en', instanceof = 'Q5',
Pproperty = 'P21|P569|P570', mode='startswith')
If lang==’’ search in any language, else the search is performed only in the language indicated.
Search only in Chinese (Simplified) (language code: zh):
Optional instanceof and Property
df <- w_SearchByLabel(string='Iranzo', langsorder='es|en', instanceof = 'Q5',
df <- w_SearchByLabel(string='Iranzo', langsorder='es|en', instanceof = 'Q5|Q101352',
df <- w_SearchByLabel(string='Iranzo', langsorder='es|en', instanceof = 'Q5',
Pproperty = 'P21|P569|P570', mode='inlabel')
later.Check if elements in entity_list are instance of a Wikimedia class
## entity instanceof instanceof_Q5
## Q6058550 Q6058550 Q16560|Q133215 FALSE
## Q11912738 Q11912738 Q3947 FALSE
## Q31835108 Q31835108 Q24529780 FALSE
## Q45976259 Q45976259 Q101352 FALSE
## Q45987474 Q45987474 Q4167410 FALSE
## Q47034606 Q47034606 Q1642895 FALSE
## Q67289998 Q67289998 Q38720 FALSE
## Q83296470 Q83296470 FALSE
## Q85684513 Q85684513 Q28564|Q12317349 FALSE
## Q97101007 Q97101007 Q245117 FALSE
## Q97101009 Q97101009 Q245117 FALSE
## Q111015546 Q111015546 Q571 FALSE
## Q117783790 Q117783790 Q811430 FALSE
## Q125544306 Q125544306 Q47461344 FALSE
## Q125544313 Q125544313 Q3331189 FALSE
## Q131370779 Q131370779 Q79007 FALSE
Search for Wikipedia pages in all/some languages
Optional: instanceOF (limit to entities which are instance of a Wikidata class)
Count entities, or get the entities with that occupation, also get Wikipedia pages
Note: depending on connection speed, nlimit parameter musts be adjusted
## [1] 20684
lw <- w_SearchByOccupation(Qoc='Q2306091', mode='wikipedias') # lw=dataframe
# We can obtain the same information using previous function w_Wikipedias:
lw2 <- w_Wikipedias(entity_list=l)
# Verifying:
all(lw['Q10320558','pages'] == lw2['Q10320558','pages'])
# Verifying:
all(sort(strsplit(lw['Q9061', 'pages'], '|', fixed = T)[[1]]) ==
sort(strsplit(lw2['Q9061', 'pages'], '|', fixed = T)[[1]]))
Check if the Wikidata entities are valid. A entity is valid if it has a label or has a description. If one entity exists but is not valid, is possible that it has a redirection to other entity, in that case, the redirection is obtained. Other entities may have existed in the past, but they are currently deleted.
l2 <- append(l, c("Q115637688", "Q105660123")) # Note: adding two new entities
v <- w_isValid(l2)
# Not valid
## entity valid instanceof redirection
## Q105660123 Q105660123 FALSE Q97352588
## Q115637688 Q115637688 FALSE
Obtain properties of entity_list.
Get some properties of a Wikidata entity.
df <- w_EntityInfo(entity_list='Q134644', langsorder='es|en')
df <- w_EntityInfo(entity_list='Q134644', langsorder='es|en', wikilangs='es|en|fr')
df <- w_EntityInfo(c('Q270510', 'Q1675466', 'Q24871'), mode='film', langsorder='es|en', wikilangs='es|en|fr')
# Search string 'abba' inlabel
w <- w_SearchByLabel('abba', mode='inlabel', langsorder = '', instanceof = 'Q5')
df <- w_EntityInfo(w$entity, langsorder='en', wikilangs='en|es|fr', debug='info')
# Search 3D films
w <- w_SearchByInstanceof(instanceof='Q229390', langsorder = 'en|es', debug = 'info')
df <- w_EntityInfo(w$entity, mode="film", langsorder='en', wikilangs='en', debug='info')
Search articles that contains any words (note: it is better to use a large string)
Some search profiles:
Checks if titles are in a Wikimedia project and returns the Wikidata entity for them, if they have one.
Note that URLdecode(“a%CC%8C”) is the letter “a” with the combining caron (ǎ)
df <- m_reqMediaWiki(c('Max Planck', URLdecode("a%CC%8C"), 'Max', 'Cervante', 'humanist'),
mode='wikidataEntity', project='en.wikipedia.org')
Obtains the redirections of a page (the page itself can be a redirect to other page).
Returns a vector for each title, in each vector the first element is the destiny, rest are all pages that redirect to it.
a <- m_reqMediaWiki(c('Cervantes', 'Planck', 'Noexiste'), mode='redirects',
## $Cervantes
## [1] "Miguel de Cervantes" "Miguel de Cerbantes"
## [3] "Miguel de Cervantes y Saavedra" "Miguel De Cervantes y Saavedra"
## [5] "El manco de Lepanto" "Miguel de cervantes"
## [7] "Manco de Lepanto" "Don Miguel de Cervantes"
## [9] "Cervantino" "Cervantina"
## [11] "Miguel de Cervantes Saavedra" "Cervantes Saavedra, Miguel de"
## [13] "Miguel de Cervantes y Cortinas" "Cervantesco"
## [15] "Cervántico" "Cervantes"
## $Planck
## [1] "Max Planck" "Planck"
## [3] "Max Karl Ernst Ludwig Planck"
## $Noexiste
## [1] NA
Gets the URL of de Primary image as a URL of Wikimedia pages.
Gets all URL of files inserted in the pages (images, sounds, videos…), using ‘|’ as separator, and excluding some extensions in the exclude_ext parameter.
Both functions automatically resolve redirects (the destiny is the “normalized” column of the data-frame returned).
Gets visits that a page have had in a date interval
Optional: redirects
Obtains information (as vector) about an article in the Wikimedia project.
Infotype: articleinfo, prose, links
Optional: redirects
x <- m_XtoolsInfo(article="Cervantes", infotype="articleinfo", project="es.wikipedia.org")
xx <- m_XtoolsInfo(article="Cervantes", infotype="articleinfo", project="es.wikipedia.org",
y <- m_XtoolsInfo(article="Miguel de Cervantes", infotype="links", project="es.wikipedia.org")
yy <- m_XtoolsInfo(article="Cervantes", infotype="links", project="es.wikipedia.org",
Gets all information (articleinfo, prose, links).
Searches authors. Sometimes the same author appears several times, under a different name).
Return a data-frame.
Important: The API returns a maximum of 10 records.
Search using CQL_Query
See https://www.oclc.org/developer/api/oclc-apis/viaf/authority-cluster.en.html
# Auxiliary function that extracts specific information from each record.
showVIAF <- function(r) {
i <- 0
for (j in r) {
i <- i+1
# Get viaf record
viaf <- j$record$recordData
viafid <- viaf$viafID
cat(paste0("-----------\nRecord #",i,"\nSources:\n"))
print(v_Extract(viaf, info='sources'))
cat("Gender: "); print(v_Extract(viaf, info='gender'))
cat("Dates: ") ; print(v_Extract(viaf, info='dates'))
cat('Occupations: '); print(v_Extract(viaf, info='occupations'))
cat("Titles: "); print(v_Extract(viaf, info='titles'))
cat("Wikipedias: "); print(v_Extract(viaf, info='wikipedias'))
Search in any field (cql.any)
Operator is “=”: so search all terms and only those ones:
CQL_Query <- 'cql.any = "García Iranzo, Juan"'
r <- v_Search(CQL_Query)
# r contains complete VIAF records (sometimes seen as a "cluster record",
# which is unified by combining records from many libraries around the world)
Search in 1xx, 4xx, 5xx fields of MARC record (local.names)
Operator is “all”: search all terms
## 0 0
Records found exceeds the maximum per request API limit : 250
Search in 100, 400, 500 fields of MARC record (local.personalNames)
Operator is “all”: search all terms
v_Search mode=allmainHeadingEl: 1xx fields of MARC record:
