How to use the validators.check_empty function in validators

To help you get started, we’ve selected a few validators examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pnnl / socialsim / december-measurements / ContentCentricMeasurements.py View on Github external
    @check_empty(default=None)
    def getGiniCoef(self,nodeType='root', eventTypes=None, content_field="root"):
        '''
        Wrapper function calculate the gini coefficient for the data frame.
        Question #6,14,26
        Input: df - Data frame containing data can be any subset of data
               nodeType - Type of node to calculate the Gini coefficient over.  Options: user or repo (case sensitive)
               eventTypes - A list of event types to include in the calculation
        Output: g - gini coefficient
        '''

        return self.getGiniCoefHelper(self.main_df, nodeType, eventTypes, content_field)
github pnnl / socialsim / december-measurements / cascade_measurements.py View on Github external
    @check_empty(default=None)
    @check_root_only(default=None)
    def cascade_collection_participation_palma(self, community_grouper=None):
        if not community_grouper:
            all_node_users = self.main_df[self.user_col].values
            return palma_ratio(list(Counter(all_node_users).values()))
        elif community_grouper in self.main_df.columns:
            meas = {}
            for community in self.main_df[community_grouper].unique():
                all_node_users = self.main_df[self.main_df[community_grouper] == community][self.user_col].values
                meas[community] = palma_ratio(list(Counter(all_node_users).values()))
            return meas
        else:
            return None
github pnnl / socialsim / december-measurements / cascade_measurements.py View on Github external
    @check_empty(default=None)
    @check_root_only(default=0)
    def get_cascade_lifetime(self, granularity="D"):
        """
        :param granularity: "s", "m", "H", "D"  [seconds/minutes/days/hours]
        """
        try:
            lifetime = (max(self.main_df[self.timestamp_col]) - min(self.main_df[self.timestamp_col])).total_seconds()
        except:
            lifetime = (max(self.main_df[self.timestamp_col]) - min(self.main_df[self.timestamp_col]))
        if granularity in ["m", "H", "D"]:
            lifetime /= 60
        if granularity in ["H", "D"]:
            lifetime /= 60
        if granularity == "D":
            lifetime /= 24
        return lifetime
github pnnl / socialsim / december-measurements / cascade_measurements.py View on Github external
    @check_empty(default=None)
    def get_depth_based_measurements(self):
        """
        :return: pandas dataframe with "breadth", "size", "structural_virality", "unique_nodes", "new_node_ratio" at each depth
        """
        self.main_df["depth"] = -1
        self.reset_cascade()
        self.cascade.set_root_node(self.main_df)

        self.main_df.loc[self.main_df[self.node_col] == self.cascade.root_node, 'depth'] = 0

        seed_nodes = [self.cascade.root_node]
        depth = 1
        while len(seed_nodes) > 0:
            self.main_df.loc[(self.main_df[self.parent_node_col].isin(seed_nodes)) & (
                        self.main_df[self.node_col] != self.main_df[self.parent_node_col]), 'depth'] = depth
            seed_nodes = self.main_df[(self.main_df[self.parent_node_col].isin(seed_nodes)) & (
github pnnl / socialsim / december-measurements / cascade_measurements.py View on Github external
    @check_empty(default=None)
    @check_root_only(default=None)
    def cascade_participation_palma(self):
        return palma_ratio(self.node_participation())
github pnnl / socialsim / december-measurements / cascade_measurements.py View on Github external
    @check_empty(default=None)
    def get_cascades_distribution_measurements(self):
        """
        :return: pandas dataframe with cascade identiifer and "depth", "breadth", "size", "structural_virality" and lifetime for each cascade in the population
        """
        cascades_distribution_measurements = []
        for cascade_identifier, scm in self.scms.items():
            cascades_distribution_measurements.append([cascade_identifier,
                                                       scm.community,
                                                       scm.cascade.get_cascade_depth(),
                                                       scm.cascade.get_cascade_size(),
                                                       scm.cascade.get_cascade_breadth(),
                                                       scm.cascade.get_cascade_structural_virality(),
                                                       scm.cascade.get_cascade_lifetime()
                                                       ])

        cols = ["rootID", "communityID", "depth", "size", "breadth", "structural_virality", "lifetime"]
github pnnl / socialsim / december-measurements / cascade_measurements.py View on Github external
    @check_empty(default=None)
    def cascade_collection_initialization_gini(self, community_grouper=None):
        if not community_grouper:
            root_node_users = self.main_df[self.main_df[self.node_col] == self.main_df[self.root_node_col]][
                self.user_col].values
            return pysal.inequality.gini.Gini(list(Counter(root_node_users).values())).g
        elif community_grouper in self.main_df.columns:
            meas = {}
            for community in self.main_df[community_grouper].unique():
                root_node_users = self.main_df[(self.main_df[self.node_col] == self.main_df[self.root_node_col]) & (
                            self.main_df[community_grouper] == community)][self.user_col].values
                meas[community] = pysal.inequality.gini.Gini(list(Counter(root_node_users).values())).g
            return meas
        else:
            return None
github pnnl / socialsim / december-measurements / cascade_measurements.py View on Github external
    @check_empty(default=None)
    def cascade_collection_participation_gini(self, community_grouper=None):
        if not community_grouper:
            all_node_users = self.main_df[self.user_col].values
            return pysal.inequality.gini.Gini(list(Counter(all_node_users).values())).g
        elif community_grouper in self.main_df.columns:
            meas = {}
            for community in self.main_df[community_grouper].unique():
                all_node_users = self.main_df[self.main_df[community_grouper] == community][self.user_col].values
                meas[community] = pysal.inequality.gini.Gini(list(Counter(all_node_users).values())).g
            return meas
        else:
            return None
github pnnl / socialsim / december-measurements / cascade_measurements.py View on Github external
    @check_empty(default=None)
    @check_root_only(default=1.0)
    def fraction_of_nodes_in_lcc(self, community_grouper=None):
        if not community_grouper:
            return max([scm.cascade.get_cascade_size() for scm in self.scms.values()]) / len(self.main_df)
        elif community_grouper in self.main_df.columns:
            meas = {}
            for community in self.main_df[community_grouper].unique():
                meas[community] = max(
                    [scm.cascade.get_cascade_size() for scm in self.scms.values() if scm.community == community]) / len(
                    self.main_df[self.main_df[community_grouper] == community])
            return meas
        else:
            return None
github pnnl / socialsim / december-measurements / cascade_measurements.py View on Github external
    @check_empty(default=None)
    def get_community_users_count_timeseries(self, time_granularity="M", community_grouper=None):
        """
        :param time_granularity: "Y", "M", "D", "H" [years/months/days/hours]
         :param community_grouper: column that indicates a community, eg. communityID, keyword
        :return: pandas dataframe with number of unique users who participate in start in that interval
        """
        temporal_measurements = []

        if community_grouper in self.main_df.columns:
            for community_identifier, community_df in self.main_df.groupby(community_grouper):
                cumul_df = None
                for ts, df in community_df.set_index(self.timestamp_col).groupby(pd.Grouper(freq=time_granularity),
                                                                                 sort=True):
                    if cumul_df is None:
                        cumul_df = df.copy()
                        old_unique_users_count = 0