Skip to content

Commit

Permalink
Improve ckan harvester
Browse files Browse the repository at this point in the history
- Fixduplicate list  default_values from config.
  • Loading branch information
mjanez committed Jul 29, 2024
1 parent 8ad1c74 commit 3beccce
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 15 deletions.
7 changes: 2 additions & 5 deletions ckanext/schemingdcat/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,9 @@
metadata_templates_search_identifier = 'schemingdcat_xls-template'
mimetype_base_uri = 'http://www.iana.org/assignments/media-types'
slugify_pat = re.compile('[^a-zA-Z0-9]')

# schemingdcat field_mapping extras field_names
# schemingdcat field_mapping extras prefix, e.g. custom_field = extras_custom_field
field_mapping_extras_prefix = 'extras'
field_mapping_extras_prefix_symbol = '_'
field_mapping_extras_prefix_list = 'extras'
field_mapping_extras_prefix = field_mapping_extras_prefix_list + field_mapping_extras_prefix_symbol


# Default DCAT metadata configuration
OGC2CKAN_HARVESTER_MD_CONFIG = {
Expand Down
22 changes: 15 additions & 7 deletions ckanext/schemingdcat/harvesters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
slugify_pat,
field_mapping_extras_prefix,
field_mapping_extras_prefix_symbol,
field_mapping_extras_prefix_list
)

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -540,8 +539,8 @@ def apply_field_mapping(d, mapping):
if 'field_name' in remote_info:
remote_field = remote_info['field_name']
if remote_field and remote_field.startswith(field_mapping_extras_prefix):
extra_key = remote_field.split(field_mapping_extras_prefix_symbol, 1)[1]
extra_value = get_extra_value(d.get(field_mapping_extras_prefix_list, []), extra_key)
extra_key = remote_field.split(field_mapping_extras_prefix + field_mapping_extras_prefix_symbol, 1)[1]
extra_value = get_extra_value(d.get(field_mapping_extras_prefix, []), extra_key)
if extra_value is not None:
new_dict[local_field] = extra_value
elif remote_field in d:
Expand All @@ -553,8 +552,8 @@ def apply_field_mapping(d, mapping):
if 'field_name' in lang_info:
remote_field = lang_info['field_name']
if remote_field and remote_field.startswith(field_mapping_extras_prefix):
extra_key = remote_field.split(field_mapping_extras_prefix_symbol, 1)[1]
extra_value = get_extra_value(d.get(field_mapping_extras_prefix_list, []), extra_key)
extra_key = remote_field.split(field_mapping_extras_prefix + field_mapping_extras_prefix_symbol, 1)[1]
extra_value = get_extra_value(d.get(field_mapping_extras_prefix, []), extra_key)
if extra_value is not None:
if local_field not in new_dict:
new_dict[local_field] = {}
Expand Down Expand Up @@ -1398,6 +1397,7 @@ def update_dict_with_defaults(target_dict, default_values):
target_dict[key] = default_value
elif isinstance(target_dict[key], list) and isinstance(default_value, list):
target_dict[key].extend(default_value)
target_dict[key] = list(set(target_dict[key]))
elif isinstance(default_value, dict):
target_dict[key] = target_dict.get(key, {})
for subkey, subvalue in default_value.items():
Expand Down Expand Up @@ -1492,8 +1492,8 @@ def _set_package_dict_default_values(self, package_dict, harvest_object, context
# Prepare tags
package_dict, existing_tags_ids = self._set_ckan_tags(package_dict)

#TODO: Fix existing_tags_ids
log.debug('TODO:existing_tags_ids: %s', existing_tags_ids)
# Existing_tags_ids
log.debug('existing_tags_ids: %s', existing_tags_ids)

# Set default tags if needed
default_tags = self.config.get("default_tags", [])
Expand All @@ -1516,6 +1516,14 @@ def _set_package_dict_default_values(self, package_dict, harvest_object, context

package_dict["groups"] = cleaned_groups

# Remove duplicates in list or dictionary fields
for key, value in package_dict.items():
if key not in ['groups', 'resources', 'tags']:
if isinstance(value, list):
package_dict[key] = list({json.dumps(item): item for item in value}.values())
elif isinstance(value, dict):
package_dict[key] = {k: v for k, v in value.items()}

# log.debug('package_dict default values: %s', package_dict)
return package_dict

Expand Down
3 changes: 0 additions & 3 deletions ckanext/schemingdcat/harvesters/ckan.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,9 +630,6 @@ def get_package_dict(self, harvest_object, context, package_dict=None):
Returns:
dict: The package dictionary with translated fields and default values set.
"""
# Add default values: tags, groups, etc.
package_dict = self._set_package_dict_default_values(package_dict, harvest_object, context)

# Update unique ids
for resource in package_dict['resources']:
resource['alternate_identifier'] = resource['id']
Expand Down

0 comments on commit 3beccce

Please sign in to comment.