Commit 1cb73094 authored by Julia Kaufhold's avatar Julia Kaufhold
Browse files

add all metadata from skeleton, collect file PID for new REST API at draft creation.

parent 895f9ab9
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import pprint
import json
import simplejson
import requests
import logging.handlers
import argparse
import ConfigParser
import jsonpatch
from manifest import IRODSUtils
#from pyxb.binding.basis import element
logger = logging.getLogger('B2shareClient')
......@@ -21,30 +24,27 @@ class B2shareClient():
self.configuration = conf
#self.b2share_url = ( conf.b2share_scheme + "://" + conf.b2share_addr + conf.b2share_path)
################################################################################
def createDraft(self, community_name, title):
def createDraft(self, community_id, title, filePIDsList):
"""
Create a new record, in the draft state.
"""
if (community_name is None) | (community_name == ''):
communities_list = self.getAllCommunities()
logger.error("No communityName specified. Please select one of the communities names: " + str(communities_list.keys()))
print("No communityName specified. Please select one of the communities names: " + str(communities_list.keys()))
return None
community_id = self.getCommunityIDByName(community_name)
record_id = None
if community_id:
logger.debug("title: " + str(title) + ", token: " + self.configuration.access_token + ", community: " + str(community_id))
acces_part = self.configuration.access_parameter + "=" + self.configuration.access_token
create_draft_url = self.configuration.b2share_host_name + self.configuration.records_endpoint + acces_part
data = '{"titles":[{"title":"' + title + '"}], "community":"' \
+ community_id + '", "open_access":true, "community_specific": {}}'
# For the new B2SHARE API
# data = '{"titles":[{"title":"' + title + '"}], ' + \
# '"community":"' + community_id + '", ' + \
# '"file_pids:"' + filePIDsList + '", ' + \
# '"open_access":true, "community_specific": {}}'
data = '{"titles":[{"title":"' + title + '"}], ' + \
'"community":"' + community_id + '", ' + \
'"open_access":true, "community_specific": {}}'
headers = {"Content-Type":"application/json"}
print(create_draft_url + " : " + str(headers) + " : " + data)
draft = requests.post(url=create_draft_url, headers=headers, data=data)
print(str(draft.json()))
logger.debug("status code: " + str(draft.status_code))
if (str(draft.status_code) == "200") | (str(draft.status_code) == "201"):
record_id = draft.json()['id']
......@@ -52,23 +52,6 @@ class B2shareClient():
return record_id
logger.error("No record created: " + str(draft.json()))
return None
def getCommunityIDByName(self, community_name):
host = self.configuration.b2share_host_name
endpoint = self.configuration.list_communities_endpoint
acces_part = self.configuration.access_parameter + "=" + self.configuration.access_token
list_communities_url = host + endpoint + acces_part
response = requests.get(url=list_communities_url)
communities_list = response.json()["hits"]["hits"]
id = None
for community_object in communities_list:
name = community_object["name"]
if community_name == name:
id = community_object["id"]
return id
################################################################################
# Get iRODS metadata
def getB2safeMetadata(self):
......@@ -76,7 +59,7 @@ class B2shareClient():
return None
# Patch the draft with extra metadata
def addB2shareMetadata(self, record_id, communityName, metadata_path, irodsu):
def addB2shareMetadata(self, record_id, metadata_path, irodsu):
"""
This action updates the draft record with new information.
"""
......@@ -84,31 +67,67 @@ class B2shareClient():
patch_url = self.configuration.b2share_host_name + self.configuration.records_endpoint + record_id \
+ "/draft" + acces_part
#NOT WORKING
# JSON object from community schema
#src = self.getCommunitySchema(communityName)
# JSON object from the metadata file = community schema filled by user with data
#dst = str(irodsu.getFile(metadata_path))
#NOT WORKING
#ONLY FOR TEST, works with empty keywords field
# JSON object from the metadata file = community schema filled by user with data
metadata_lines = irodsu.getFile(metadata_path).split('\n\n')
draft_metadata = self.getDraftMetadata(record_id)
src = {}
dst = {'keywords': ["keyword1", "keyword2"]}
#ONLY FOR TEST
#get draft metadata as src and metadata file as dst
#src1 = self.getDraftMetadata(record_id)
#testsrc = src1['keywords']
#src = {'keywords': src1['keywords']}
#print(testsrc)
#dst1 = str(irodsu.getFile(metadata_path))
dst = {}
for line in metadata_lines[1:]:
line_content = line.split('\n')
if len(line_content) == 2:
option_name = line_content[0]
if option_name == 'community' or option_name == '':
logger.info("ignore the value for the 'community' as it's immutable and empty lines from meta data file")
else:
if option_name in draft_metadata.keys():
# src[option_name] = draft_metadata[option_name]
src[option_name] = ''
value = line_content[1]
if value.startswith('['):
value = value.replace('[','').replace(']','')
values_array = []
if value.startswith('{') :
if '},' in value:
#many objects in array
delimiter = '}'
arr = value.split(delimiter)
values_array.append(arr[0]+'}')
for elem in value.split(delimiter)[1:]:
values_array.append(elem[1:].strip()+'}')
else:
#one element in array
values_array.append(value)
else:
values_array = value.split(',')
array_of_objects = []
for array_element in values_array:
if '{' in array_element:
array_of_objects.append(self.valueToObject(array_element))
if array_of_objects:
values_array = array_of_objects
dst[option_name] = values_array
else:
if value.startswith('{'):
#object
value = self.valueToObject(value)
else:
#string
if value.lower() == "true":
value = True
else:
if value.lower() == "false":
value = False
dst[option_name] = value
patch = jsonpatch.make_patch(src, dst)
#print(patch)
headers = {"Content-Type": "application/json-patch+json"}
response = requests.patch(url=patch_url, headers=headers, data=str(patch))
print(response.text)
logger.debug("responce: " + str(response.text))
def valueToObject(self, value):
v = json.loads(value)
return v
def getDraftMetadata(self, record_id):
acces_part = self.configuration.access_parameter + "=" + self.configuration.access_token
......@@ -123,14 +142,14 @@ class B2shareClient():
community_id = communities_list[communityName]
community_endpoint = self.configuration.list_communities_endpoint
get_schema_endpoint = self.configuration.get_community_schema_endpoint
acces_part = self.configuration.access_parameter + "=" + self.configuration.access_token
acces_part = self.configuration.access_parameter + "=" + self.configuration.access_token
get_community_schema_url = self.configuration.b2share_host_name + community_endpoint\
+ community_id + get_schema_endpoint + acces_part
response = requests.get(url=get_community_schema_url)
return response.text
################################################################################
# Publish the record
# Publish the record
def publishRecord(self, record_id):
"""Publish a record in B2SHARE"""
acces_part = self.configuration.access_parameter + "=" + self.configuration.access_token
......@@ -139,15 +158,8 @@ class B2shareClient():
patch = '[{"op":"add", "path":"/publication_state", "value":"submitted"}]'
headers = {"Content-Type": "application/json-patch+json"}
response = requests.patch(url=publish_record_url, headers=headers, data=patch)
print(response.text)
################################################################################
def addFilesToDraft(self, record_id, collectionPath):
self.logger.info("addFilesToDraft not possible now")
#TODO: collect PIDs from files imeta in the colleciton
#TODO: send the REST request to B2Share with the list of PIDs to update the record with record_id
logger.debug("responce: " + str(response.text))
################################################################################
#TODO add the function "Search drafts"
def getDraftByID(self, draft_id):
......@@ -156,23 +168,6 @@ class B2shareClient():
#TODO add the function "delete drafts"
def deleteDraft(self, draft_id):
logger.info('')
def getAllCommunities(self):
host = self.configuration.b2share_host_name
endpoint = self.configuration.list_communities_endpoint
acces_part = self.configuration.access_parameter + "=" + self.configuration.access_token
list_communities_url = host + endpoint + acces_part
response = requests.get(url=list_communities_url)
communities_list = response.json()["hits"]["hits"]
communities = {}
for community_object in communities_list:
name = community_object["name"]
id = community_object["id"]
communities[name] = id
return communities
################################################################################
# Configuration Class #
......@@ -240,7 +235,7 @@ class Configuration():
else: return False
return opt
else:
self.logger.warning('missing parameter %s:%s' % (section,option))
logger.warning('missing parameter %s:%s' % (section,option))
return None
......@@ -256,12 +251,86 @@ def draft(args):
configuration.access_token = getAccessTokenWithConfigs(configuration)
b2shcl = B2shareClient(configuration)
record_id = b2shcl.createDraft(args.communityName, args.title)
logger.info("Drafting END")
filePIDsList = collectPIDsForCollection(args.collectionPath, configuration)
if args.communityID:
record_id = b2shcl.createDraft(args.communityID, args.title, filePIDsList)
else:
commID = getCommunityIDByName(configuration, args.communityName)
record_id = b2shcl.createDraft(commID, args.title, filePIDsList)
if record_id is not None:
logger.info("Drafting for record"+record_id+"END")
else:
logger.error("Drafting FAILED")
def getCommunityIDByName(configuration, community_name):
host = configuration.b2share_host_name
endpoint = configuration.list_communities_endpoint
acces_part = configuration.access_parameter + "=" + configuration.access_token
list_communities_url = host + endpoint + acces_part
response = requests.get(url=list_communities_url)
communities_list = response.json()["hits"]["hits"]
community_id = None
for community_object in communities_list:
name = community_object["name"]
if community_name == name:
community_id = community_object["id"]
return community_id
def getAllCommunities(args):
configuration = Configuration(args.confpath, args.debug, args.dryrun, logger)
configuration.parseConf()
configuration.access_token = getAccessTokenWithConfigs(configuration)
host = configuration.b2share_host_name
endpoint = configuration.list_communities_endpoint
acces_part = configuration.access_parameter + "=" + configuration.access_token
list_communities_url = host + endpoint + acces_part
response = requests.get(url=list_communities_url)
communities_list = response.json()["hits"]["hits"]
communities = {}
for community_object in communities_list:
name = community_object["name"]
community_id = community_object["id"]
communities[name] = community_id
print("List of communities and their id's: \n"+ pprint.pformat(communities))
def collectPIDsForCollection(collectionPath, configuration):
PIDobjects = []
irodsu = IRODSUtils(configuration.irods_home_dir, logger, configuration.irods_debug)
rc, res = irodsu.deepListDir(collectionPath)
if res:
filePathsMap = collectFilePathsFromTree(res)
for filePath in filePathsMap.keys():
print(filePath)
filePID = irodsu.getMetadata(filePath, "PID")
print(str(filePID))
if filePID :
pidObject = '{"'+filePath+'":"'+filePID[0]+'"}'
PIDobjects.append(pidObject)
return PIDobjects
def collectFilePathsFromTree(filesTree):
filePaths = {}
for coll in filesTree:
for fp in filesTree[coll]['__files__']:
# loop over the files of the collection
if ":" in fp:
fp = fp.replace(":", "___")
filePaths[coll + os.sep + fp] = fp
if len(filesTree[coll]) > 1:
# there are also subdirs
del filesTree[coll]['__files__']
fm = collectFilePathsFromTree(filesTree[coll])
# merge the map dictionaries
temp = fm.copy()
temp.update(filePaths)
filePaths = temp
return filePaths
def addMetadata(args):
logger.info("Adding metadata ...")
configuration = Configuration(args.confpath, args.debug, args.dryrun, logger)
configuration.parseConf()
......@@ -269,27 +338,9 @@ def addMetadata(args):
configuration.access_token = irodsu.getMetadata(args.userName, "access_token", '-u')[0]
b2shcl = B2shareClient(configuration)
#system_metadata = b2shcl.getB2safeMetadata()
b2shcl.addB2shareMetadata(args.record_id, args.commName, args.metadata, irodsu)
b2shcl.addB2shareMetadata(args.record_id, args.metadata, irodsu)
logger.info("Added metadata")
def addFilePIDsToDraft(args):
logger.info("Adding file names to the draft ...")
#get files list from collection name specified in args
files = ''
configuration = Configuration(args.confpath, args.debug, args.dryrun, logger)
configuration.parseConf()
configuration.access_token = getAccessTokenWithConfigs(configuration)
#update draft with the files names/path's? list
#B2Share REST API endpoint needed
b2shcl = B2shareClient(configuration)
record_id = b2shcl.addFilesToDraft(files, args.rec_id, args.collectionPath)
logger.info("File names successfully added to draft.")
def publish(args):
logger.info("Publishing ...")
......@@ -304,7 +355,7 @@ def publish(args):
def getAccessTokenWithConfigs(configuration):
#get access_token from users metadata in iRODS
irodsu = IRODSUtils(configuration.irods_home_dir, logger, configuration.irods_debug)
access_token = irodsu.getMetadata(args.userName, "access_token", '-u')[0] #TODO: "access_token" as config variable?
access_token = irodsu.getMetadata(args.userName, "access_token", '-u')[0]
return access_token
if __name__ == "__main__":
......@@ -317,26 +368,29 @@ if __name__ == "__main__":
parser.add_argument("-u", "--userName", help="iRODS user name")
subparsers = parser.add_subparsers(help='sub-command help', dest='subcmd')
parser_list_comm = subparsers.add_parser('listCommunities', help="List all communities with their names and id's")
parser_list_comm.set_defaults(func=getAllCommunities)
parser_draft = subparsers.add_parser('draft', help='Create a draft record')
parser_draft.add_argument('-c', '--communityName', required=True, help='B2Share community name')
input_group = parser_draft.add_mutually_exclusive_group(required=True)
input_group.add_argument("-c", "--communityName", help="B2Share community name")
input_group.add_argument("-i", "--communityID", help="B2Share community id")
parser_draft.add_argument('-ti', '--title', help='title of the record')
parser_draft.add_argument('-cp', '--collectionPath', required=True, help='path to the collection in iRODS with files')
parser_draft.set_defaults(func=draft)
parser_meta = subparsers.add_parser('meta', help='Add metadata to the draft')
parser_meta.add_argument('-id', '--record_id', required=True, help='the b2share id of the record')
parser_meta.add_argument('--commName', required=True, help='B2Share community name')
input_group = parser_meta.add_mutually_exclusive_group(required=True)
input_group.add_argument("-cn", "--commName", help="B2Share community name")
input_group.add_argument("-ci", "--commID", help="B2Share community id")
parser_meta.add_argument('-md', '--metadata', required=True, help='path to the metadata JSON file of the record')
parser_meta.set_defaults(func=addMetadata)
parser_file_pids = subparsers.add_parser('addFilePIDs', help='add file PIDs to the draft')
parser_file_pids.add_argument('-pi', '--rec_id', required=True, help='the b2share id of the record')
parser_file_pids.add_argument('-cn', '--collectionPath', required=True, help='path to the collection in iRODS with files')
parser_file_pids.set_defaults(func=addFilePIDsToDraft)
parser_pub = subparsers.add_parser('pub', help='publish the draft')
parser_pub.add_argument('-pi', '--rec_id', required=True, help='the b2share id of the record')
parser_pub.set_defaults(func=publish)
args = parser.parse_args()
args.func(args)
......@@ -2,10 +2,7 @@
# -*- coding: utf-8 -*-
import requests
import json
import os
import logging
import logging.handlers
import argparse
import ConfigParser
......@@ -92,10 +89,10 @@ def getAllCommunities(configuration):
communities = {}
for community_object in communities_list:
name = community_object["name"]
id = community_object["id"]
communities[name] = id
community_id = community_object["id"]
communities[name] = community_id
return communities
return communities
def create_md_schema(args):
......@@ -125,21 +122,29 @@ def create_md_schema(args):
response = requests.get(url=get_community_schema_url)
#May be parsing for manifest extention
#community_schema = response.json()["json_schema"]["allOf"][0]
#print(response.json())
#print(community_schema)
#then ["properties"] dictianary with all properties objects
#and ["required"] the list of required ones
community_schema = response.json()["json_schema"]["allOf"][0]
requiredProperties = community_schema["b2share"]["presentation"]["major"]
optionalProperties = community_schema["b2share"]["presentation"]["minor"]
mdPatchSceleton = '#please fill out at least the required fields with values as JSON strings in the line after the property name.' + "\n"
mdPatchSceleton = mdPatchSceleton + '#e.g. for community - EUDAT, for open_access - true, for contributors - '+ \
'[{"contributor_name":"Hulk", "contributor_type": "Editor"}, {"contributor_name":"Banner", "contributor_type": "ContactPerson"}]' + "\n\n"
mdPatchSceleton = mdPatchSceleton + "[required]" + "\n"
for requiredProperty in requiredProperties:
mdPatchSceleton = mdPatchSceleton + requiredProperty + "\n\n"
mdPatchSceleton = mdPatchSceleton + "\n" + "[optional]" + "\n"
for optionalPropertiy in optionalProperties:
mdPatchSceleton = mdPatchSceleton + optionalPropertiy + "\n\n"
if args.dryrun:
print(str(response.text))
print(mdPatchSceleton)
else:
logger.info('Writing the metadata to a file')
file_path = args.collectionName + "/" + "b2share_metadata.json" #TODO: config, argument?
file_path = args.collectionName + os.sep + "b2share_metadata.json" #TODO: config, argument?
temp = tempfile.NamedTemporaryFile()
try:
temp.write(str(response.text))
temp.write(mdPatchSceleton)
temp.flush()
try:
irodsu.putFile(temp.name, file_path, configuration.irods_resource)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment